我在 Tensorflow 中构建了一个具有 2 个卷积层、1 个池层和 2 个 FC 层的 CNN。当我不使用 dropout 时,我在训练数据集上获得 98% 的准确率,在测试数据集上获得 90% 的准确率。但是,当我使用 dropout 时,我在训练数据集上获得 62% 的准确率,在测试数据集上获得 83% 的准确率。
当每个标签有 500-1200 个样本时,我使用 25 个标签。
可能是什么问题呢?
更新1
建立网络
batch_size = 50
conv1_kernel_size = 3
conv1_num_kernels = 16
conv2_kernel_size = 3
conv2_num_kernels = 16
num_hidden = 64
num_channels = 1
image_size = 32
with tf.Graph().as_default() as graph:
# input data
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_test_dataset = tf.constant(test_dataset)
tf_test_single_data = tf.placeholder(tf.float32, shape=(1, image_size, image_size, num_channels))
conv1_weights = tf.Variable(tf.truncated_normal([conv1_kernel_size, conv1_kernel_size, num_channels, conv1_num_kernels]), name='conv1_weights')
conv1_biases = tf.Variable(tf.zeros([conv1_num_kernels]), name='conv1_biases')
conv2_weights = tf.Variable(tf.truncated_normal([conv2_kernel_size, conv2_kernel_size, conv1_num_kernels, conv2_num_kernels]), name='conv2_weights')
conv2_biases = tf.Variable(tf.constant(1.0, shape=[conv2_num_kernels]), name='conv2_biases')
fc1_weights = tf.Variable(tf.truncated_normal([image_size // 2 * image_size // 2 * conv2_num_kernels, num_hidden], stddev=0.1), name='fc1_weights')
fc1_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]), name='fc1_biases')
fc2_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1), 'fc2_weights')
fc2_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), 'fc2_biases')
keep_prob = tf.placeholder(tf.float32)
# model
def model(data):
conv1 = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
conv1_hidden = tf.nn.relu(conv1 + conv1_biases)
conv2 = tf.nn.conv2d(conv1_hidden, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
conv2_hidden = tf.nn.relu(conv2 + conv2_biases)
pool_conv2_hidden = tf.nn.max_pool(conv2_hidden, ksize=[1,2,2,1], strides=[1, 2, 2, 1], padding='SAME')
pool_conv2_hidden_shape = pool_conv2_hidden.get_shape().as_list()
fc1 = tf.reshape(pool_conv2_hidden, [pool_conv2_hidden_shape[0], pool_conv2_hidden_shape[1] * pool_conv2_hidden_shape[2] * pool_conv2_hidden_shape[3]])
fc1_hidden = tf.nn.relu(tf.matmul(fc1, fc1_weights) + fc1_biases)
fc1_drop_hidden = tf.nn.dropout(fc1_hidden, keep_prob)
fc2 = tf.matmul(fc1_drop_hidden, fc2_weights) + fc2_biases
return fc2
# training computation
logits = model(tf_train_dataset)
loss_cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss_cross_entropy)
# predictions
train_prediction = tf.nn.softmax(logits)
test_prediction = tf.nn.softmax(model(tf_test_dataset))
运行网络
num_steps = 20000
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print('Initialized')
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : 1.0}
_, loss, predictions = session.run([optimizer, loss_cross_entropy, train_prediction], feed_dict=feed_dict)
train_acc = accuracy(predictions, batch_labels)
if (step % 50 == 0):
epoch = (step * batch_size) // (train_labels.shape[0] - batch_size)
print('Epoch-%d - Minibatch loss at step %d: %f' % (epoch, step, loss))
print('Epoch-%d - Minibatch train accuracy: %.1f%%' % (epoch, train_acc))
print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(feed_dict={keep_prob : 1.0}), test_labels))