I am new to tensorflow and trying to train the following two layer network. It seems it is not working as cross entropy is not decreasing as iteration. I think I am screwed up connecting hidden layer to output layer. Please help me if you can see the problem,
import tensorflow as tf
from scipy.io import loadmat
import numpy as np
import sys
x = loadmat('../mnist_data/ex4data1.mat')
X = x['X']
# one hot conversion
y_temp = x['y']
y_temp = np.reshape(y_temp, (len(y_temp),))
y = np.zeros((len(y_temp),10))
y[np.arange(len(y_temp)), y_temp-1] = 1.
input_size = 400
hidden1_size = 25
output_size = 10
num_iters = 50
reg_alpha = 0.05
x = tf.placeholder(tf.float32, [None, input_size], name='data')
W1 = tf.Variable(tf.zeros([hidden1_size, input_size], tf.float32, name='weights_1st_layer'))
b1 = tf.Variable(tf.zeros([hidden1_size], tf.float32), name='bias_layer_1')
W2 = tf.Variable(tf.zeros([output_size, hidden1_size], tf.float32, name='weights_2nd_layer'))
b2 = tf.Variable(tf.zeros([output_size], tf.float32), name='bias_layer_2')
hidden_op = tf.nn.relu(tf.add(tf.matmul(x, W1, transpose_b=True), b1))
output_op = tf.matmul(hidden_op, W2, transpose_b=True) + b2
pred = tf.nn.softmax(output_op)
y_ = tf.placeholder(tf.float32, [None, 10], name='actual_labels')
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
labels=y_, logits=output_op))
train_step = tf.train.GradientDescentOptimizer(reg_alpha).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for _ in range(50):
print ('training..', _)
print (sess.run([train_step, cross_entropy], feed_dict={x : X, y_ : y}))
corr_pred = tf.equal(tf.argmax(pred, axis=1), tf.argmax(y_, axis=1))
acc = tf.reduce_mean(tf.cast(corr_pred, tf.float32))
print (sess.run(acc, feed_dict={x:X, y_:y}))
sess.close()