Tensorflow CNN doesn't converge when training with fer2013 dataset

I planned to train facial expression classification with CNN in Tensorflow. My CNN model has been used to train MNIST dataset, and the outcome is pretty well(nearly 98% accuracy). However no matter how small the training rate I set(from 0.01 to 0.000001), the loss(cross entropy) do not converge in fer2013 dataset.

My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.

I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?

My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10

# Read .csv files 

######################################### 

train_csv_path = 'fer2013/valid_train.csv'

test_csv_path = 'fer2013/test.csv'

test_img_data = 

train_img_data = 

iterator = 0



print('Reading training dataset and testing dataset...')

readfile1 = open(test_csv_path, mode='r')

reader1 = csv.reader(readfile1)  

header = next(reader1)  

readfile2 = open(train_csv_path, mode='r')

reader2 = csv.reader(readfile2)  

header = next(reader2)  

for row in reader1:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    test_img_data.append(img_int)

for row in reader2:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    train_img_data.append(img_int)

test_img_data = np.asarray(test_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

train_img_data = np.asarray(train_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

print('Reading complete!')

print('Training dataset with shape ' + str(train_img_data.shape))

print('Testing dataset with shape ' + str(test_img_data.shape))

readfile1.close()

readfile2.close()



# 1. Define datasets with numpy array

train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)

train_dataset = train_dataset.batch(32)

train_dataset = train_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,

                                                  output_shapes=train_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets

train_init_op = train_iterator.make_initializer(train_dataset)



# 1. Define datasets with numpy array

test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)

test_dataset = test_dataset.batch(32)  #

test_dataset = test_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,

                                                  output_shapes=test_dataset.output_shapes)



# 3. An operation to initialize the iterator with (different) datasets

test_init_op = test_iterator.make_initializer(test_dataset)           

    #######################################################

    def filter_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def weight_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def bias_variable(shape):

        init = tf.constant(0.1, shape=shape)

        return tf.Variable(init)





    def conv2d(input, filter):

        return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")





    def max_pool_3x3_2(input):

        return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")





    def max_pool_5x5_1(input):

        return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")





    def max_pool_2x2_2(input):

        return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")





    def compute_accuracy(data):

        global prediction

        global input_y

        pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})



        labels = sess.run(input_y, feed_dict={input:data, keep_prob:1}) 



        correct_prediction = tf.equal(tf.argmax(pre, 1),

                                  tf.argmax(labels, 1))

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        result = sess.run(accuracy)

        return result





    IMG_SIZE = 48

    EMO_SIZE = 7



    keep_prob = tf.placeholder(dtype=tf.float32)

    input = tf.placeholder(dtype=tf.float32)



    input_x = input[0:, 0:-1]   # Features 

    input_y = input[0:, -1:]    # Labels

    # transform input_y into one_hot_vector

    input_y = tf.reshape(input_y, shape=[-1])

    input_y = tf.cast(input_y, tf.int32)

    input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)



    input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])



    # My CNN 

    #############################################################

    # 1_conv

    filter1 = filter_variable(shape=[5, 5, 1, 32])

    b1 = bias_variable(shape=[32])

    output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32



    # 2_max_pool

    output_2_max_pool = max_pool_3x3_2(output_1_conv)   # output 22*22*32



    # 3_conv

    filter3 = filter_variable(shape=[5, 5, 32, 64])

    b3 = bias_variable(shape=[64])

    output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)

    # output 18*18*64



    # 4_max_pool

    output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64



    # 5_conv

    filter5 = filter_variable(shape=[4, 4, 64, 128])

    b5 = bias_variable(shape=[128])

    output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128



    # 6_fc with 3072 neurons

    W6 = weight_variable(shape=[15*15*128, 2048])

    W6 = tf.nn.dropout(W6, keep_prob)   # add dropout

    b6 = bias_variable(shape=[2048])

    output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])

    output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)

    # output -1*3072



    # 7_fc with 7 neurons

    W7 = weight_variable(shape=[2048, 7])

    W7 = tf.nn.dropout(W7, keep_prob)   # add dropout

    b7 = bias_variable(shape=[7])

    output_7_fc = tf.matmul(output_6_fc, W7) + b7

    prediction = tf.nn.softmax(output_7_fc)



    # output -1*7

    #######################################################################



    # loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))

    loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))

    train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)





    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        sess.run(train_init_op) # Initialize dataset

        sess.run(test_init_op)  # Initialize dataset



        print('Train start!')

        next_element = train_iterator.get_next()

        next_test_element = test_iterator.get_next()



        for i in range(100):

            element = sess.run(next_element)

            test_element = sess.run(next_test_element)

            sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})

            print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),

                  'Accuracy =', compute_accuracy(test_element))

The ouput during training and testing:

EPOCH 0, loss = 1822.4683 Accuracy = 0.227

EPOCH 1, loss = 1819.7567 Accuracy = 0.246

EPOCH 2, loss = 1799.698 Accuracy = 0.275

EPOCH 3, loss = 1815.156 Accuracy = 0.238

EPOCH 4, loss = 1815.1738 Accuracy = 0.261

EPOCH 5, loss = 1814.6595 Accuracy = 0.25

EPOCH 6, loss = 1799.3706 Accuracy = 0.235

EPOCH 7, loss = 1829.245 Accuracy = 0.21910113

EPOCH 8, loss = 1841.583 Accuracy = 0.227

After tens of Epoches:

EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113

EPOCH 88, loss = 1798.821 Accuracy = 0.228

EPOCH 89, loss = 1734.7308 Accuracy = 0.25

EPOCH 90, loss = 1801.3701 Accuracy = 0.275

EPOCH 91, loss = 1795.1626 Accuracy = 0.238

EPOCH 92, loss = 1754.9252 Accuracy = 0.261

EPOCH 93, loss = 1762.0444 Accuracy = 0.25

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28

@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02

add a comment |

My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.

I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?

My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10

# Read .csv files 

######################################### 

train_csv_path = 'fer2013/valid_train.csv'

test_csv_path = 'fer2013/test.csv'

test_img_data = 

train_img_data = 

iterator = 0



print('Reading training dataset and testing dataset...')

readfile1 = open(test_csv_path, mode='r')

reader1 = csv.reader(readfile1)  

header = next(reader1)  

readfile2 = open(train_csv_path, mode='r')

reader2 = csv.reader(readfile2)  

header = next(reader2)  

for row in reader1:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    test_img_data.append(img_int)

for row in reader2:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    train_img_data.append(img_int)

test_img_data = np.asarray(test_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

train_img_data = np.asarray(train_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

print('Reading complete!')

print('Training dataset with shape ' + str(train_img_data.shape))

print('Testing dataset with shape ' + str(test_img_data.shape))

readfile1.close()

readfile2.close()



# 1. Define datasets with numpy array

train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)

train_dataset = train_dataset.batch(32)

train_dataset = train_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,

                                                  output_shapes=train_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets

train_init_op = train_iterator.make_initializer(train_dataset)



# 1. Define datasets with numpy array

test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)

test_dataset = test_dataset.batch(32)  #

test_dataset = test_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,

                                                  output_shapes=test_dataset.output_shapes)



# 3. An operation to initialize the iterator with (different) datasets

test_init_op = test_iterator.make_initializer(test_dataset)           

    #######################################################

    def filter_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def weight_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def bias_variable(shape):

        init = tf.constant(0.1, shape=shape)

        return tf.Variable(init)





    def conv2d(input, filter):

        return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")





    def max_pool_3x3_2(input):

        return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")





    def max_pool_5x5_1(input):

        return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")





    def max_pool_2x2_2(input):

        return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")





    def compute_accuracy(data):

        global prediction

        global input_y

        pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})



        labels = sess.run(input_y, feed_dict={input:data, keep_prob:1}) 



        correct_prediction = tf.equal(tf.argmax(pre, 1),

                                  tf.argmax(labels, 1))

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        result = sess.run(accuracy)

        return result





    IMG_SIZE = 48

    EMO_SIZE = 7



    keep_prob = tf.placeholder(dtype=tf.float32)

    input = tf.placeholder(dtype=tf.float32)



    input_x = input[0:, 0:-1]   # Features 

    input_y = input[0:, -1:]    # Labels

    # transform input_y into one_hot_vector

    input_y = tf.reshape(input_y, shape=[-1])

    input_y = tf.cast(input_y, tf.int32)

    input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)



    input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])



    # My CNN 

    #############################################################

    # 1_conv

    filter1 = filter_variable(shape=[5, 5, 1, 32])

    b1 = bias_variable(shape=[32])

    output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32



    # 2_max_pool

    output_2_max_pool = max_pool_3x3_2(output_1_conv)   # output 22*22*32



    # 3_conv

    filter3 = filter_variable(shape=[5, 5, 32, 64])

    b3 = bias_variable(shape=[64])

    output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)

    # output 18*18*64



    # 4_max_pool

    output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64



    # 5_conv

    filter5 = filter_variable(shape=[4, 4, 64, 128])

    b5 = bias_variable(shape=[128])

    output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128



    # 6_fc with 3072 neurons

    W6 = weight_variable(shape=[15*15*128, 2048])

    W6 = tf.nn.dropout(W6, keep_prob)   # add dropout

    b6 = bias_variable(shape=[2048])

    output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])

    output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)

    # output -1*3072



    # 7_fc with 7 neurons

    W7 = weight_variable(shape=[2048, 7])

    W7 = tf.nn.dropout(W7, keep_prob)   # add dropout

    b7 = bias_variable(shape=[7])

    output_7_fc = tf.matmul(output_6_fc, W7) + b7

    prediction = tf.nn.softmax(output_7_fc)



    # output -1*7

    #######################################################################



    # loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))

    loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))

    train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)





    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        sess.run(train_init_op) # Initialize dataset

        sess.run(test_init_op)  # Initialize dataset



        print('Train start!')

        next_element = train_iterator.get_next()

        next_test_element = test_iterator.get_next()



        for i in range(100):

            element = sess.run(next_element)

            test_element = sess.run(next_test_element)

            sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})

            print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),

                  'Accuracy =', compute_accuracy(test_element))

The ouput during training and testing:

EPOCH 0, loss = 1822.4683 Accuracy = 0.227

EPOCH 1, loss = 1819.7567 Accuracy = 0.246

EPOCH 2, loss = 1799.698 Accuracy = 0.275

EPOCH 3, loss = 1815.156 Accuracy = 0.238

EPOCH 4, loss = 1815.1738 Accuracy = 0.261

EPOCH 5, loss = 1814.6595 Accuracy = 0.25

EPOCH 6, loss = 1799.3706 Accuracy = 0.235

EPOCH 7, loss = 1829.245 Accuracy = 0.21910113

EPOCH 8, loss = 1841.583 Accuracy = 0.227

After tens of Epoches:

EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113

EPOCH 88, loss = 1798.821 Accuracy = 0.228

EPOCH 89, loss = 1734.7308 Accuracy = 0.25

EPOCH 90, loss = 1801.3701 Accuracy = 0.275

EPOCH 91, loss = 1795.1626 Accuracy = 0.238

EPOCH 92, loss = 1754.9252 Accuracy = 0.261

EPOCH 93, loss = 1762.0444 Accuracy = 0.25

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28

@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02

add a comment |

My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.

I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?

My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10

# Read .csv files 

######################################### 

train_csv_path = 'fer2013/valid_train.csv'

test_csv_path = 'fer2013/test.csv'

test_img_data = 

train_img_data = 

iterator = 0



print('Reading training dataset and testing dataset...')

readfile1 = open(test_csv_path, mode='r')

reader1 = csv.reader(readfile1)  

header = next(reader1)  

readfile2 = open(train_csv_path, mode='r')

reader2 = csv.reader(readfile2)  

header = next(reader2)  

for row in reader1:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    test_img_data.append(img_int)

for row in reader2:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    train_img_data.append(img_int)

test_img_data = np.asarray(test_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

train_img_data = np.asarray(train_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

print('Reading complete!')

print('Training dataset with shape ' + str(train_img_data.shape))

print('Testing dataset with shape ' + str(test_img_data.shape))

readfile1.close()

readfile2.close()



# 1. Define datasets with numpy array

train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)

train_dataset = train_dataset.batch(32)

train_dataset = train_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,

                                                  output_shapes=train_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets

train_init_op = train_iterator.make_initializer(train_dataset)



# 1. Define datasets with numpy array

test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)

test_dataset = test_dataset.batch(32)  #

test_dataset = test_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,

                                                  output_shapes=test_dataset.output_shapes)



# 3. An operation to initialize the iterator with (different) datasets

test_init_op = test_iterator.make_initializer(test_dataset)           

    #######################################################

    def filter_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def weight_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def bias_variable(shape):

        init = tf.constant(0.1, shape=shape)

        return tf.Variable(init)





    def conv2d(input, filter):

        return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")





    def max_pool_3x3_2(input):

        return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")





    def max_pool_5x5_1(input):

        return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")





    def max_pool_2x2_2(input):

        return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")





    def compute_accuracy(data):

        global prediction

        global input_y

        pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})



        labels = sess.run(input_y, feed_dict={input:data, keep_prob:1}) 



        correct_prediction = tf.equal(tf.argmax(pre, 1),

                                  tf.argmax(labels, 1))

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        result = sess.run(accuracy)

        return result





    IMG_SIZE = 48

    EMO_SIZE = 7



    keep_prob = tf.placeholder(dtype=tf.float32)

    input = tf.placeholder(dtype=tf.float32)



    input_x = input[0:, 0:-1]   # Features 

    input_y = input[0:, -1:]    # Labels

    # transform input_y into one_hot_vector

    input_y = tf.reshape(input_y, shape=[-1])

    input_y = tf.cast(input_y, tf.int32)

    input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)



    input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])



    # My CNN 

    #############################################################

    # 1_conv

    filter1 = filter_variable(shape=[5, 5, 1, 32])

    b1 = bias_variable(shape=[32])

    output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32



    # 2_max_pool

    output_2_max_pool = max_pool_3x3_2(output_1_conv)   # output 22*22*32



    # 3_conv

    filter3 = filter_variable(shape=[5, 5, 32, 64])

    b3 = bias_variable(shape=[64])

    output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)

    # output 18*18*64



    # 4_max_pool

    output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64



    # 5_conv

    filter5 = filter_variable(shape=[4, 4, 64, 128])

    b5 = bias_variable(shape=[128])

    output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128



    # 6_fc with 3072 neurons

    W6 = weight_variable(shape=[15*15*128, 2048])

    W6 = tf.nn.dropout(W6, keep_prob)   # add dropout

    b6 = bias_variable(shape=[2048])

    output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])

    output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)

    # output -1*3072



    # 7_fc with 7 neurons

    W7 = weight_variable(shape=[2048, 7])

    W7 = tf.nn.dropout(W7, keep_prob)   # add dropout

    b7 = bias_variable(shape=[7])

    output_7_fc = tf.matmul(output_6_fc, W7) + b7

    prediction = tf.nn.softmax(output_7_fc)



    # output -1*7

    #######################################################################



    # loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))

    loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))

    train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)





    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        sess.run(train_init_op) # Initialize dataset

        sess.run(test_init_op)  # Initialize dataset



        print('Train start!')

        next_element = train_iterator.get_next()

        next_test_element = test_iterator.get_next()



        for i in range(100):

            element = sess.run(next_element)

            test_element = sess.run(next_test_element)

            sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})

            print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),

                  'Accuracy =', compute_accuracy(test_element))

The ouput during training and testing:

EPOCH 0, loss = 1822.4683 Accuracy = 0.227

EPOCH 1, loss = 1819.7567 Accuracy = 0.246

EPOCH 2, loss = 1799.698 Accuracy = 0.275

EPOCH 3, loss = 1815.156 Accuracy = 0.238

EPOCH 4, loss = 1815.1738 Accuracy = 0.261

EPOCH 5, loss = 1814.6595 Accuracy = 0.25

EPOCH 6, loss = 1799.3706 Accuracy = 0.235

EPOCH 7, loss = 1829.245 Accuracy = 0.21910113

EPOCH 8, loss = 1841.583 Accuracy = 0.227

After tens of Epoches:

EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113

EPOCH 88, loss = 1798.821 Accuracy = 0.228

EPOCH 89, loss = 1734.7308 Accuracy = 0.25

EPOCH 90, loss = 1801.3701 Accuracy = 0.275

EPOCH 91, loss = 1795.1626 Accuracy = 0.238

EPOCH 92, loss = 1754.9252 Accuracy = 0.261

EPOCH 93, loss = 1762.0444 Accuracy = 0.25

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

My dataset are training[14890, 48*48*1], and testing[7178, 48*48*1], each row contains 48*48 features and 1 labels.

I don't understand, it is because the initial value of weights and filters of each layer? Or I should try another way to calculate cross entropy?

My environment:
Python 3.6, Tensorflow-gpu 1.11.0, Windows 10

# Read .csv files 

######################################### 

train_csv_path = 'fer2013/valid_train.csv'

test_csv_path = 'fer2013/test.csv'

test_img_data = 

train_img_data = 

iterator = 0



print('Reading training dataset and testing dataset...')

readfile1 = open(test_csv_path, mode='r')

reader1 = csv.reader(readfile1)  

header = next(reader1)  

readfile2 = open(train_csv_path, mode='r')

reader2 = csv.reader(readfile2)  

header = next(reader2)  

for row in reader1:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    test_img_data.append(img_int)

for row in reader2:

    img_string = np.asarray(row[1].split())  # shape [48*48] string pixels

    img_int = [int(x) for x in img_string]  # shape [48*48]

    img_int.append(int(row[0]))  # shape [48*48+1], the last bit is 'emotion'

    train_img_data.append(img_int)

test_img_data = np.asarray(test_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

train_img_data = np.asarray(train_img_data, dtype=np.float32)  # shape [-1, 48*48+1]

print('Reading complete!')

print('Training dataset with shape ' + str(train_img_data.shape))

print('Testing dataset with shape ' + str(test_img_data.shape))

readfile1.close()

readfile2.close()



# 1. Define datasets with numpy array

train_dataset = tf.data.Dataset.from_tensor_slices(train_img_data)

train_dataset = train_dataset.batch(32)

train_dataset = train_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

train_iterator = tf.data.Iterator.from_structure(output_types=train_dataset.output_types,

                                                  output_shapes=train_dataset.output_shapes)

# 3. An operation to initialize the iterator with (different) datasets

train_init_op = train_iterator.make_initializer(train_dataset)



# 1. Define datasets with numpy array

test_dataset = tf.data.Dataset.from_tensor_slices(test_img_data)

test_dataset = test_dataset.batch(32)  #

test_dataset = test_dataset.repeat()  # Make dataset loop infinitely

# 2. Define a reinitializable iterator (can be initialized for multiple times)

test_iterator = tf.data.Iterator.from_structure(output_types=test_dataset.output_types,

                                                  output_shapes=test_dataset.output_shapes)



# 3. An operation to initialize the iterator with (different) datasets

test_init_op = test_iterator.make_initializer(test_dataset)           

    #######################################################

    def filter_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def weight_variable(shape):

        init = tf.truncated_normal(shape=shape, stddev=0.01)

        return tf.Variable(init)





    def bias_variable(shape):

        init = tf.constant(0.1, shape=shape)

        return tf.Variable(init)





    def conv2d(input, filter):

        return tf.nn.conv2d(input=input, filter=filter, strides=[1, 1, 1, 1], padding="VALID")





    def max_pool_3x3_2(input):

        return tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")





    def max_pool_5x5_1(input):

        return tf.nn.max_pool(input, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding="SAME")





    def max_pool_2x2_2(input):

        return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")





    def compute_accuracy(data):

        global prediction

        global input_y

        pre = sess.run(prediction, feed_dict={input:data, keep_prob:1})



        labels = sess.run(input_y, feed_dict={input:data, keep_prob:1}) 



        correct_prediction = tf.equal(tf.argmax(pre, 1),

                                  tf.argmax(labels, 1))

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        result = sess.run(accuracy)

        return result





    IMG_SIZE = 48

    EMO_SIZE = 7



    keep_prob = tf.placeholder(dtype=tf.float32)

    input = tf.placeholder(dtype=tf.float32)



    input_x = input[0:, 0:-1]   # Features 

    input_y = input[0:, -1:]    # Labels

    # transform input_y into one_hot_vector

    input_y = tf.reshape(input_y, shape=[-1])

    input_y = tf.cast(input_y, tf.int32)

    input_y = tf.one_hot(input_y, depth = EMO_SIZE, dtype=tf.float32)



    input_x = tf.reshape(tensor=input_x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])



    # My CNN 

    #############################################################

    # 1_conv

    filter1 = filter_variable(shape=[5, 5, 1, 32])

    b1 = bias_variable(shape=[32])

    output_1_conv = tf.nn.relu(conv2d(input=input_x, filter=filter1) + b1) # output 44*44*32



    # 2_max_pool

    output_2_max_pool = max_pool_3x3_2(output_1_conv)   # output 22*22*32



    # 3_conv

    filter3 = filter_variable(shape=[5, 5, 32, 64])

    b3 = bias_variable(shape=[64])

    output_3_conv = tf.nn.relu(conv2d(input=output_2_max_pool, filter=filter3) + b3)

    # output 18*18*64



    # 4_max_pool

    output_4_max_pool = max_pool_5x5_1(input=output_3_conv) # output 18*18*64



    # 5_conv

    filter5 = filter_variable(shape=[4, 4, 64, 128])

    b5 = bias_variable(shape=[128])

    output_5_conv = tf.nn.relu(conv2d(input=output_4_max_pool, filter=filter5) + b5) # output 15*15*128



    # 6_fc with 3072 neurons

    W6 = weight_variable(shape=[15*15*128, 2048])

    W6 = tf.nn.dropout(W6, keep_prob)   # add dropout

    b6 = bias_variable(shape=[2048])

    output_5_conv_flat = tf.reshape(output_5_conv, shape=[-1, 15*15*128])

    output_6_fc = tf.nn.relu(tf.matmul(output_5_conv_flat, W6) + b6)

    # output -1*3072



    # 7_fc with 7 neurons

    W7 = weight_variable(shape=[2048, 7])

    W7 = tf.nn.dropout(W7, keep_prob)   # add dropout

    b7 = bias_variable(shape=[7])

    output_7_fc = tf.matmul(output_6_fc, W7) + b7

    prediction = tf.nn.softmax(output_7_fc)



    # output -1*7

    #######################################################################



    # loss = tf.reduce_mean(-tf.reduce_sum(input_y * tf.log(prediction)))

    loss = -tf.reduce_sum(input_y*tf.log(tf.clip_by_value(prediction,1e-10,1.0)))

    train_step = tf.train.GradientDescentOptimizer(0.00001).minimize(loss)





    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        sess.run(train_init_op) # Initialize dataset

        sess.run(test_init_op)  # Initialize dataset



        print('Train start!')

        next_element = train_iterator.get_next()

        next_test_element = test_iterator.get_next()



        for i in range(100):

            element = sess.run(next_element)

            test_element = sess.run(next_test_element)

            sess.run(train_step, feed_dict={input: element, keep_prob: 0.5})

            print('EPOCH %d, loss =' % i, sess.run(loss, feed_dict={input: element, keep_prob: 0.5}),

                  'Accuracy =', compute_accuracy(test_element))

The ouput during training and testing:

EPOCH 0, loss = 1822.4683 Accuracy = 0.227

EPOCH 1, loss = 1819.7567 Accuracy = 0.246

EPOCH 2, loss = 1799.698 Accuracy = 0.275

EPOCH 3, loss = 1815.156 Accuracy = 0.238

EPOCH 4, loss = 1815.1738 Accuracy = 0.261

EPOCH 5, loss = 1814.6595 Accuracy = 0.25

EPOCH 6, loss = 1799.3706 Accuracy = 0.235

EPOCH 7, loss = 1829.245 Accuracy = 0.21910113

EPOCH 8, loss = 1841.583 Accuracy = 0.227

After tens of Epoches:

EPOCH 87, loss = 1786.2544 Accuracy = 0.21910113

EPOCH 88, loss = 1798.821 Accuracy = 0.228

EPOCH 89, loss = 1734.7308 Accuracy = 0.25

EPOCH 90, loss = 1801.3701 Accuracy = 0.275

EPOCH 91, loss = 1795.1626 Accuracy = 0.238

EPOCH 92, loss = 1754.9252 Accuracy = 0.261

EPOCH 93, loss = 1762.0444 Accuracy = 0.25

python tensorflow deep-learning conv-neural-network

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

edited Nov 26 '18 at 2:34

asked Nov 25 '18 at 13:13

Alfred Wei

asked Nov 25 '18 at 13:13

Alfred Wei

asked Nov 25 '18 at 13:13

Alfred Wei

probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28

@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02

add a comment |

probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28

@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02

probably want to make the clip symmetric (tf.clip_by_value(prediction,1e-10,1.0-1e-10)), what happens if you use dense and conv2dinstead? (look at our example at github.com/mbrucher/BuildingMachineLearningSystemsWithPython/…). Also seems like lots of nodes for MNIS, then we don't know the ranges for the new dataset (no preprocessing shown).

– Matthieu Brucher
Nov 25 '18 at 13:28

@Matthieu Brucher, thanks for your advice. I tried to replace all my layers by dense and conv2d, still get the same outcome... Also I've update my code about preprocessing.

– Alfred Wei
Nov 26 '18 at 1:02

add a comment |

0

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53467823%2ftensorflow-cnn-doesnt-converge-when-training-with-fer2013-dataset%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

0

active

oldest

votes

0

active

oldest

votes

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Tukukkk