Tensorflow CNN doesn't converge when training with fer2013 dataset












0















I planned to train facial expression classification with CNN in Tensorflow. My CNN model has been used to train MNIST dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01 to 0.000001), the loss(cross entropy) do not converge in fer2013 dataset.



My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.



I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?



My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10



# Read .csv files 
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0

print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()

# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)

# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)


def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")


def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")


def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})

labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})

correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result


IMG_SIZE = 48
EMO_SIZE = 7

keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)

input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)

input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])

# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32

# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32

# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64

# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64

# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128

# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072

# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)

# output -1*7
#######################################################################

# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)


with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset

print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()

for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))


The ouput during training and testing:



EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227


After tens of Epoches:



EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25









share|improve this question

























  • probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

    – Matthieu Brucher
    Nov 25 '18 at 13:28











  • @Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

    – Alfred Wei
    Nov 26 '18 at 1:02
















0















I planned to train facial expression classification with CNN in Tensorflow. My CNN model has been used to train MNIST dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01 to 0.000001), the loss(cross entropy) do not converge in fer2013 dataset.



My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.



I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?



My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10



# Read .csv files 
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0

print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()

# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)

# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)


def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")


def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")


def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})

labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})

correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result


IMG_SIZE = 48
EMO_SIZE = 7

keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)

input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)

input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])

# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32

# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32

# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64

# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64

# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128

# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072

# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)

# output -1*7
#######################################################################

# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)


with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset

print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()

for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))


The ouput during training and testing:



EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227


After tens of Epoches:



EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25









share|improve this question

























  • probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

    – Matthieu Brucher
    Nov 25 '18 at 13:28











  • @Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

    – Alfred Wei
    Nov 26 '18 at 1:02














0












0








0


1






I planned to train facial expression classification with CNN in Tensorflow. My CNN model has been used to train MNIST dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01 to 0.000001), the loss(cross entropy) do not converge in fer2013 dataset.



My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.



I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?



My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10



# Read .csv files 
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0

print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()

# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)

# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)


def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")


def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")


def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})

labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})

correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result


IMG_SIZE = 48
EMO_SIZE = 7

keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)

input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)

input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])

# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32

# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32

# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64

# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64

# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128

# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072

# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)

# output -1*7
#######################################################################

# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)


with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset

print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()

for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))


The ouput during training and testing:



EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227


After tens of Epoches:



EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25









share|improve this question
















I planned to train facial expression classification with CNN in Tensorflow. My CNN model has been used to train MNIST dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01 to 0.000001), the loss(cross entropy) do not converge in fer2013 dataset.



My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.



I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?



My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10



# Read .csv files 
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0

print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()

# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)

# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)


def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)


def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")


def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")


def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})

labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})

correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result


IMG_SIZE = 48
EMO_SIZE = 7

keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)

input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)

input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])

# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32

# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32

# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64

# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64

# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128

# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072

# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)

# output -1*7
#######################################################################

# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)


with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset

print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()

for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))


The ouput during training and testing:



EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227


After tens of Epoches:



EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25






python tensorflow deep-learning conv-neural-network






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 26 '18 at 2:34







Alfred Wei

















asked Nov 25 '18 at 13:13









Alfred WeiAlfred Wei

12




12













  • probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

    – Matthieu Brucher
    Nov 25 '18 at 13:28











  • @Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

    – Alfred Wei
    Nov 26 '18 at 1:02



















  • probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

    – Matthieu Brucher
    Nov 25 '18 at 13:28











  • @Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

    – Alfred Wei
    Nov 26 '18 at 1:02

















probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28





probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28













@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02





@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02












0






active

oldest

votes











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53467823%2ftensorflow-cnn-doesnt-converge-when-training-with-fer2013-dataset%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown

























0






active

oldest

votes








0






active

oldest

votes









active

oldest

votes






active

oldest

votes
















draft saved

draft discarded




















































Thanks for contributing an answer to Stack Overflow!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53467823%2ftensorflow-cnn-doesnt-converge-when-training-with-fer2013-dataset%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







Popular posts from this blog

404 Error Contact Form 7 ajax form submitting

How to know if a Active Directory user can login interactively

TypeError: fit_transform() missing 1 required positional argument: 'X'