python - 带有 Tensorflow 的 CNN,CIFAR-10 的准确率低且没有改进

标签 python tensorflow neural-network deep-learning

在 CIFAR-10 上运行 3 层卷积网络的第一个训练阶段时,我既无法达到足够高的验证精度,也无法最小化目标函数。

具体来说,准确度在第一次迭代时发生变化,然后在接下来的迭代中稳定在 8.7%。不同寻常的是,我还训练了一个 2 层的全连接网络,它的表现要好得多,在验证集上的准确率始终保持在 43% 左右。

注意:大部分代码来自 Jupyter notebook,该 notebook 旨在作为斯坦福 CS231n Convolutional Neural Networks for Visual Recognition 作业的一部分提供准系统 Tensorflow(和 Keras)的介绍。虽然我既不是这门类(class)的学生也不是大学的学生,但我这样做纯粹是出于体验目的,并且出于我对 CV/深度学习的新生兴趣。 我的贡献只是前向传递和网络参数初始化的实现。

notebook 的作者发表评论指出,如果正确实现,此模型应在第一个 epoch 后达到 40% 以上的准确率,而无需任何超参数调整。

实现说明

  • 49,000/1000:训练/验证拆分,批量大小 = 64

  • 权重使用 Kaiming 归一化进行初始化,偏差使用 0 进行初始化

  • 学习率=3e-3

  • 这里详细介绍了 convnet 的每一层:

    1. 具有 32 个 5x5 过滤器的卷积层(带偏置),带零填充 2
    2. ReLU 卷积层(带偏差),带 16 个 3x3 过滤器,零填充 1

    3. ReLU 全连接层(带偏差)计算 10 个类的分数

代码

(我的写在'TODO'评论 block 之间)

import tensorflow as tf
import numpy as np


def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test


class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


def flatten(x):
    N = tf.shape(x)[0]
    return tf.reshape(x, (N, -1))


def three_layer_convnet(x, params):
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    ############################################################################
    # TODO: Implement the forward pass for the three-layer ConvNet.            #
    ############################################################################
    h1_conv = tf.nn.conv2d(x, 
                           conv_w1 + conv_b1, 
                           strides=[1, 1, 1, 1], 
                           padding='SAME'
    )
    h1 = tf.nn.relu(h1_conv)

    h2_conv = tf.nn.conv2d(h1, 
                           conv_w2 + conv_b2, 
                           strides=[1, 1, 1, 1], 
                           padding='SAME'
    )
    h2 = tf.nn.relu(h2_conv)

    fc_params = flatten(fc_w + fc_b)
    h2 = flatten(h2)
    scores = tf.matmul(h2, fc_params)
    ############################################################################
    #                              END OF YOUR CODE                            #
    ############################################################################
    return scores


def training_step(scores, y, params, learning_rate):
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
    loss = tf.reduce_mean(losses)
    grad_params = tf.gradients(loss, params)

    new_weights = []   
    for w, grad_w in zip(params, grad_params):
        new_w = tf.assign_sub(w, learning_rate * grad_w)
        new_weights.append(new_w)

    with tf.control_dependencies(new_weights):
        return tf.identity(loss)


def check_accuracy(sess, dset, x, scores, is_training=None):
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x: x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))


def kaiming_normal(shape):
    if len(shape) == 2:
        fan_in, fan_out = shape[0], shape[1]
    elif len(shape) == 4:
        fan_in, fan_out = np.prod(shape[:3]), shape[3]
    return tf.random_normal(shape) * np.sqrt(2.0 / fan_in)


def three_layer_convnet_init():
    params = None
    ############################################################################
    # TODO: Initialize the parameters of the three-layer network.              #
    ############################################################################
    conv_w1 = tf.Variable(kaiming_normal((5, 5, 3, 32)))
    conv_b1 = tf.Variable(tf.zeros((32,)))
    conv_w2 = tf.Variable(kaiming_normal((3, 3, 32, 16)))
    conv_b2 = tf.Variable(tf.zeros((16,)))
    fc_w = tf.Variable(kaiming_normal((32 * 32 * 16, 10)))
    fc_b = tf.Variable(tf.zeros((10,)))
    params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################
    return params


def main():
    learning_rate = 3e-3
    tf.reset_default_graph()
    is_training = tf.placeholder(tf.bool, name='is_training')

    X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
    train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
    test_dset = Dataset(X_test, y_test, batch_size=64)
    val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
    print('Train data shape: ', X_train.shape)
    print('Train labels shape: ', y_train.shape, y_train.dtype)
    print('Validation data shape: ', X_val.shape)
    print('Validation labels shape: ', y_val.shape)
    print('Test data shape: ', X_test.shape)
    print('Test labels shape: ', y_test.shape)

    device = '/cpu:0'

    with tf.device(device):
        x = tf.placeholder(tf.float32, [None, 32, 32, 3])
        y = tf.placeholder(tf.int32, [None])
        params = three_layer_convnet_init()
        scores = three_layer_convnet(x, params)
        loss = training_step(scores, y, params, learning_rate)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for t, (x_np, y_np) in enumerate(train_dset):
            feed_dict = {x: x_np, y: y_np}
            loss_np = sess.run(loss, feed_dict=feed_dict)

            if t % 100 == 0:
                print('Iteration %d, loss = %.4f' % (t, loss_np))
                check_accuracy(sess, val_dset, x, scores, is_training)

if __name__=="__main__":
    main()

编辑:删除了不必要的注释和代码

最佳答案

问题就在这里

h1_conv = tf.nn.conv2d(x, 
                       conv_w1 + conv_b1, 
                       strides=[1, 1, 1, 1], 
                       padding='SAME'
)

这是错误的,因为您在这里将偏差值 (conv_b1) 添加到过滤器 conv_w1,但必须将偏差添加到 conv 层的输出。正确的方法应该是这样的

h1_conv = tf.nn.conv2d(x, 
                       conv_w1, 
                       strides=[1, 1, 1, 1], 
                       padding='SAME'

)
h1_bias = tf.nn.bias_add(h1_conv, conv_b1)
h1 = tf.nn.relu(h1_bias)

也为 h2 更正它。

关于python - 带有 Tensorflow 的 CNN,CIFAR-10 的准确率低且没有改进,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51146597/

相关文章:

python - 订阅 IMU 传感器并监测方位值以确定汽车的行驶方向

python - 为什么这个 python 正则表达式不能编译?

tensorflow - 使用 Tensorflow 数据集创建 RLE(运行长度编码)掩码

python - 实现自定义层来生成二维指数图像

machine-learning - 我无法让 Caffe 工作

python - 使用 Python、win32api 和 Acrobat Reader 9 打印 PDF

python - 洗牌一个共同的第二个元素的所有列表

Windows 上的 Tensorflow Tensorboard 显示空白页面

python-3.x - 固定时间表的自适应学习率

neural-network - 用于逼近具有四个参数的函数的神经网络