python - Tensorflow CNN 实现的准确性较差

我正在尝试在 Tensorflow 中实现一个 5 层深度卷积神经网络,其中包含 3 个卷积层,后跟 2 个全连接层。我当前的实现如下。

def deepnn(x):

    x_image = tf.reshape(x, [-1, FLAGS.img_width, FLAGS.img_height, FLAGS.img_channels])
    img_summary = tf.summary.image('Input_images', x_image)

    with tf.variable_scope('Conv_1'):
        W_conv1 = weight_variable([5, 5, FLAGS.img_channels, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1,2) + b_conv1)
        h_pool1 = avg_pool_3x3(h_conv1)

    with tf.variable_scope('Conv_2'):
        W_conv2 = weight_variable([5, 5, 32, 32])
        b_conv2 = bias_variable([32])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2,2) + b_conv2)
        h_pool2 = avg_pool_3x3(h_conv2)

    with tf.variable_scope('Conv_3'):
        W_conv3 = weight_variable([5, 5, 32, 64])
        b_conv3 = bias_variable([64])
        h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3,2) + b_conv3)
        h_pool3 = max_pool_3x3(h_conv3)

    with tf.variable_scope('FC_1'):
        h_pool3_flat = tf.reshape(h_pool3,[-1,4*4*64])
        W_fc1 = weight_variable([4*4*64,64])
        b_fc1 = bias_variable([64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1) + b_fc1)

    with tf.variable_scope('FC_2'):
        W_fc2 = weight_variable([64, FLAGS.num_classes])
        b_fc2 = bias_variable([FLAGS.num_classes])
        y_fc2 = tf.matmul(h_fc1, W_fc2) + b_fc2

    with tf.variable_scope('softmax'):
        y_conv = tf.nn.softmax(y_fc2)

    return y_conv, img_summary

def conv2d(x, W,p):
    output = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name='convolution')
    return tf.pad(output, tf.constant([[0,0],[p, p,],[p, p],[0,0]]), "CONSTANT")

def avg_pool_3x3(x):
    output = tf.nn.avg_pool(x, ksize=[1, 3, 3, 1],
                          strides=[1, 2, 2, 1], padding='VALID', name='pooling')
    return tf.pad(output, tf.constant([[0,0],[0, 1,],[0, 1],[0,0]]), "CONSTANT")

def max_pool_3x3(x):
    output = tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
                          strides=[1, 2, 2, 1], padding='VALID', name='pooling2')
    return tf.pad(output, tf.constant([[0,0],[0, 1], [0, 1],[0,0]]), "CONSTANT")

def weight_variable(shape):
    weight_init = tf.random_uniform(shape, -0.05,0.05)
    return tf.Variable(weight_init, name='weights')

def bias_variable(shape):
    bias_init = tf.random_uniform(shape, -0.05,0.05)
    return tf.Variable(bias_init, name='biases')

def main(_):

    dataset = pickle.load(open('dataset.pkl', 'rb'),encoding='latin1')
    train_dataset = dataset[0]

    learning_rate = 0.01
    current_validation_acc = 1

    with tf.variable_scope('inputs'):
        x = tf.placeholder(tf.float32, [None, FLAGS.img_width * FLAGS.img_height * FLAGS.img_channels])
        y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes])

    y_conv, img_summary = deepnn(x)

    with tf.variable_scope('softmax_loss'):
        softmax_loss = tf.reduce_mean(tf.negative(tf.log(tf.reduce_sum(tf.multiply(y_conv,y_),1))))

    tf.add_to_collection('losses', softmax_loss)
    loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

    train_step = tf.train.MomentumOptimizer(learning_rate,FLAGS.momentum).minimize(loss)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
    loss_summary = tf.summary.scalar('Loss', loss)
    acc_summary = tf.summary.scalar('Accuracy', accuracy)

由于某种未知的原因,该模型的准确率似乎没有提高到 10% 以上。我一直用头撞墙试图找出原因。我正在使用 softmax 损失成本函数(如所述 here )和动量优化器。使用的数据集是 GTSRB dataset .

虽然我可以添加各种深度学习功能(例如自适应学习率等)来提高准确性,但我怀疑为什么基本 CNN 模型的表现如此糟糕。




看看 this question 中的正确公式。无论如何,你应该简单地使用 tf.nn.softmax_cross_entropy_with_logits (并从y_conv中删除softmax,因为损失函数本身应用softmax)。

PS。 CNN 架构对我来说看起来不错,如果有正确的超参数,应该可以达到 60%-70%。

