Tensorflow CNN doesn't converge when training with fer2013 dataset
I planned to train facial expression classification with CNN in Tensorflow
. My CNN model has been used to train MNIST
dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01
to 0.000001
), the loss(cross entropy) do not converge in fer2013
dataset.
My dataset are training[14890, 48*48*1]
, and testing[7178, 48*48*1]
, each row contains 48*48 features and 1 labels.
I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?
My environment:
Python 3.6
, Tensorflow-gpu 1.11.0
, Windows 10
# Read .csv files
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0
print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()
# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)
# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)
def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")
def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})
labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})
correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result
IMG_SIZE = 48
EMO_SIZE = 7
keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)
input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)
input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32
# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32
# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64
# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64
# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128
# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072
# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)
# output -1*7
#######################################################################
# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset
print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()
for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))
The ouput during training and testing:
EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227
After tens of Epoches:
EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25
python tensorflow deep-learning conv-neural-network
add a comment |
I planned to train facial expression classification with CNN in Tensorflow
. My CNN model has been used to train MNIST
dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01
to 0.000001
), the loss(cross entropy) do not converge in fer2013
dataset.
My dataset are training[14890, 48*48*1]
, and testing[7178, 48*48*1]
, each row contains 48*48 features and 1 labels.
I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?
My environment:
Python 3.6
, Tensorflow-gpu 1.11.0
, Windows 10
# Read .csv files
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0
print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()
# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)
# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)
def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")
def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})
labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})
correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result
IMG_SIZE = 48
EMO_SIZE = 7
keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)
input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)
input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32
# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32
# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64
# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64
# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128
# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072
# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)
# output -1*7
#######################################################################
# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset
print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()
for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))
The ouput during training and testing:
EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227
After tens of Epoches:
EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25
python tensorflow deep-learning conv-neural-network
probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you usedense
andconv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).
– Matthieu Brucher
Nov 25 '18 at 13:28
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers bydense
andconv2d
, still get the same outcome... Also I've update my code about preprocessing.
– Alfred Wei
Nov 26 '18 at 1:02
add a comment |
I planned to train facial expression classification with CNN in Tensorflow
. My CNN model has been used to train MNIST
dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01
to 0.000001
), the loss(cross entropy) do not converge in fer2013
dataset.
My dataset are training[14890, 48*48*1]
, and testing[7178, 48*48*1]
, each row contains 48*48 features and 1 labels.
I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?
My environment:
Python 3.6
, Tensorflow-gpu 1.11.0
, Windows 10
# Read .csv files
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0
print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()
# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)
# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)
def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")
def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})
labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})
correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result
IMG_SIZE = 48
EMO_SIZE = 7
keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)
input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)
input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32
# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32
# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64
# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64
# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128
# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072
# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)
# output -1*7
#######################################################################
# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset
print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()
for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))
The ouput during training and testing:
EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227
After tens of Epoches:
EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25
python tensorflow deep-learning conv-neural-network
I planned to train facial expression classification with CNN in Tensorflow
. My CNN model has been used to train MNIST
dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01
to 0.000001
), the loss(cross entropy) do not converge in fer2013
dataset.
My dataset are training[14890, 48*48*1]
, and testing[7178, 48*48*1]
, each row contains 48*48 features and 1 labels.
I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?
My environment:
Python 3.6
, Tensorflow-gpu 1.11.0
, Windows 10
# Read .csv files
#########################################
train_csv_path = 'fer2013/valid_train.csv'
test_csv_path = 'fer2013/test.csv'
test_img_data =
train_img_data =
iterator = 0
print('Reading training dataset and testing dataset...')
readfile1 = open(test_csv_path, mode='r')
reader1 = csv.reader(readfile1)
header = next(reader1)
readfile2 = open(train_csv_path, mode='r')
reader2 = csv.reader(readfile2)
header = next(reader2)
for row in reader1:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
test_img_data.append(img_int)
for row in reader2:
img_string = np.asarray(row[1].split()) # shape [48*48] string pixels
img_int = [int(x) for x in img_string] # shape [48*48]
img_int.append(int(row[0])) # shape [48*48+1], the last bit is 'emotion'
train_img_data.append(img_int)
test_img_data = np.asarray(test_img_data, dtype=np.float32) # shape [-1, 48*48+1]
train_img_data = np.asarray(train_img_data, dtype=np.float32) # shape [-1, 48*48+1]
print('Reading complete!')
print('Training dataset with shape ' + str(train_img_data.shape))
print('Testing dataset with shape ' + str(test_img_data.shape))
readfile1.close()
readfile2.close()
# 1. Define datasets with numpy array
train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)
train_dataset = train_dataset.batch(32)
train_dataset = train_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,
output_shapes=train_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
train_init_op = train_iterator.make_initializer(train_dataset)
# 1. Define datasets with numpy array
test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)
test_dataset = test_dataset.batch(32) #
test_dataset = test_dataset.repeat() # Make dataset loop infinitely
# 2. Define a reinitializable iterator (can be initialized for multiple times)
test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,
output_shapes=test_dataset.output_shapes)
# 3. An operation to initialize the iterator with (different) datasets
test_init_op = test_iterator.make_initializer(test_dataset)
#######################################################
def filter_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def weight_variable(shape):
init = tf.truncated_normal(shape=shape, stddev=0.01)
return tf.Variable(init)
def bias_variable(shape):
init = tf.constant(0.1, shape=shape)
return tf.Variable(init)
def conv2d(input, filter):
return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")
def max_pool_3x3_2(input):
return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
def max_pool_5x5_1(input):
return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")
def max_pool_2x2_2(input):
return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
def compute_accuracy(data):
global prediction
global input_y
pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})
labels = sess.run(input_y, feed_dict={input:data, keep_prob:1})
correct_prediction = tf.equal(tf.argmax(pre, 1),
tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy)
return result
IMG_SIZE = 48
EMO_SIZE = 7
keep_prob = tf.placeholder(dtype=tf.float32)
input = tf.placeholder(dtype=tf.float32)
input_x = input[0:, 0:-1] # Features
input_y = input[0:, -1:] # Labels
# transform input_y into one_hot_vector
input_y = tf.reshape(input_y, shape=[-1])
input_y = tf.cast(input_y, tf.int32)
input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)
input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
# My CNN
#############################################################
# 1_conv
filter1 = filter_variable(shape=[5, 5, 1, 32])
b1 = bias_variable(shape=[32])
output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32
# 2_max_pool
output_2_max_pool = max_pool_3x3_2(output_1_conv) # output 22*22*32
# 3_conv
filter3 = filter_variable(shape=[5, 5, 32, 64])
b3 = bias_variable(shape=[64])
output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)
# output 18*18*64
# 4_max_pool
output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64
# 5_conv
filter5 = filter_variable(shape=[4, 4, 64, 128])
b5 = bias_variable(shape=[128])
output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128
# 6_fc with 3072 neurons
W6 = weight_variable(shape=[15*15*128, 2048])
W6 = tf.nn.dropout(W6, keep_prob) # add dropout
b6 = bias_variable(shape=[2048])
output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])
output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)
# output -1*3072
# 7_fc with 7 neurons
W7 = weight_variable(shape=[2048, 7])
W7 = tf.nn.dropout(W7, keep_prob) # add dropout
b7 = bias_variable(shape=[7])
output_7_fc = tf.matmul(output_6_fc, W7) + b7
prediction = tf.nn.softmax(output_7_fc)
# output -1*7
#######################################################################
# loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))
loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(train_init_op) # Initialize dataset
sess.run(test_init_op) # Initialize dataset
print('Train start!')
next_element = train_iterator.get_next()
next_test_element = test_iterator.get_next()
for i in range(100):
element = sess.run(next_element)
test_element = sess.run(next_test_element)
sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})
print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),
'Accuracy =', compute_accuracy(test_element))
The ouput during training and testing:
EPOCH 0, loss = 1822.4683 Accuracy = 0.227
EPOCH 1, loss = 1819.7567 Accuracy = 0.246
EPOCH 2, loss = 1799.698 Accuracy = 0.275
EPOCH 3, loss = 1815.156 Accuracy = 0.238
EPOCH 4, loss = 1815.1738 Accuracy = 0.261
EPOCH 5, loss = 1814.6595 Accuracy = 0.25
EPOCH 6, loss = 1799.3706 Accuracy = 0.235
EPOCH 7, loss = 1829.245 Accuracy = 0.21910113
EPOCH 8, loss = 1841.583 Accuracy = 0.227
After tens of Epoches:
EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113
EPOCH 88, loss = 1798.821 Accuracy = 0.228
EPOCH 89, loss = 1734.7308 Accuracy = 0.25
EPOCH 90, loss = 1801.3701 Accuracy = 0.275
EPOCH 91, loss = 1795.1626 Accuracy = 0.238
EPOCH 92, loss = 1754.9252 Accuracy = 0.261
EPOCH 93, loss = 1762.0444 Accuracy = 0.25
python tensorflow deep-learning conv-neural-network
python tensorflow deep-learning conv-neural-network
edited Nov 26 '18 at 2:34
Alfred Wei
asked Nov 25 '18 at 13:13
Alfred WeiAlfred Wei
12
12
probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you usedense
andconv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).
– Matthieu Brucher
Nov 25 '18 at 13:28
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers bydense
andconv2d
, still get the same outcome... Also I've update my code about preprocessing.
– Alfred Wei
Nov 26 '18 at 1:02
add a comment |
probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you usedense
andconv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).
– Matthieu Brucher
Nov 25 '18 at 13:28
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers bydense
andconv2d
, still get the same outcome... Also I've update my code about preprocessing.
– Alfred Wei
Nov 26 '18 at 1:02
probably want to make the clip symmetric (
tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you use dense
and conv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).– Matthieu Brucher
Nov 25 '18 at 13:28
probably want to make the clip symmetric (
tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you use dense
and conv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).– Matthieu Brucher
Nov 25 '18 at 13:28
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by
dense
and conv2d
, still get the same outcome... Also I've update my code about preprocessing.– Alfred Wei
Nov 26 '18 at 1:02
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by
dense
and conv2d
, still get the same outcome... Also I've update my code about preprocessing.– Alfred Wei
Nov 26 '18 at 1:02
add a comment |
0
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53467823%2ftensorflow-cnn-doesnt-converge-when-training-with-fer2013-dataset%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
0
active
oldest
votes
0
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53467823%2ftensorflow-cnn-doesnt-converge-when-training-with-fer2013-dataset%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
probably want to make the clip symmetric (
tf.clip_by_value(prediction,1e-10,1.0-1e-10)
), what happens if you usedense
andconv2d
instead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).– Matthieu Brucher
Nov 25 '18 at 13:28
@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by
dense
andconv2d
, still get the same outcome... Also I've update my code about preprocessing.– Alfred Wei
Nov 26 '18 at 1:02