最近我一直在比较 vgg16 和 resnetv1 的 20 层。我发现虽然 vgg 上的每个 epoch 需要更多时间来完成,但通常比 resnet20 需要更少的 epoch 才能达到一定的训练精度。为什么 vgg 学得更快?我的实验正确吗?我已经在 Cifar100 和一部分 imagenet 上尝试过(来自 stanford cv course 的小图像网)
vgg 有近 14m 的参数,但 resnet 只有 0.3m。这是我对重新发送的实现:
def resnet_layer1(inputs,initializer,
num_filters=16,
kernel_size=3,
strides=1,
activation='relu',
conv_first=True,batch_normalization=True):
conv = Conv2D(num_filters,
kernel_size=kernel_size,
strides=strides,
padding='same',
kernel_initializer=initializer
)
x = inputs
if conv_first:
x = conv(x)
if batch_normalization:
x = BatchNormalization()(x)
x = Activation(activation)(x)
else:
if batch_normalization:
x = BatchNormalization()(x)
x = Activation(activation)(x)
x = conv(x)
return x
def resnet_1(model_number,x_train,y_train,x_test,y_test,datagen,initializer,epochs=20,bs=512,output_nodes=10,optim='adam',padding='same',dout=True,callbacks=None):
depth=20
# Start model definition.
num_filters = 16
num_res_blocks = int((depth - 2) / 6)
if model_number == 1:
resnet_layer=resnet_layer1
elif model_number == 2:
resnet_layer=resnet_layer2
elif model_number == 3:
resnet_layer=resnet_layer3
elif model_number == 4:
resnet_layer=resnet_layer4
inputs = Input(shape=x_train.shape[1:])
x = resnet_layer(inputs=inputs,initializer=initializer)
# Instantiate the stack of residual units
for stack in range(3):
for res_block in range(num_res_blocks):
strides = 1
if stack > 0 and res_block == 0: # first layer but not first stack
strides = 2 # downsample
y = resnet_layer(inputs=x,initializer=initializer,
num_filters=num_filters,
strides=strides)
y = resnet_layer(inputs=y,initializer=initializer,
num_filters=num_filters,
activation=None)
if stack > 0 and res_block == 0: # first layer but not first stack
# linear projection residual shortcut connection to match
# changed dims
x = resnet_layer(inputs=x,initializer=initializer,
num_filters=num_filters,
kernel_size=1,
strides=strides,
activation=None,
batch_normalization=False)
x = layers.add([x, y])
x = Activation('relu')(x)
num_filters *= 2
# Add classifier on top.
# v1 does not use BN after last shortcut connection-ReLU
x = AveragePooling2D(pool_size=8)(x)
y = Flatten()(x)
outputs = Dense(output_nodes,
activation='softmax',
kernel_initializer=initializer)(y)
# Instantiate model.
model = Model(inputs=inputs, outputs=outputs)
model.summary()
# Compile the model
model.compile(loss=['categorical_crossentropy'], optimizer=optim, metrics=["accuracy"])
checkpointer = ModelCheckpoint(filepath=str(model_number)+'_weights.hdf5', verbose=1,save_weights_only=True,save_freq=2000000)
callbacks.append(checkpointer)
if x_test.all() == None:
history=model.fit_generator(datagen.flow(x_train, y_train,batch_size=bs),callbacks=callbacks,epochs=epochs, steps_per_epoch=x_train.shape[0]//bs)
else:
history=model.fit_generator(datagen.flow(x_train, y_train,batch_size=bs),callbacks=callbacks,epochs=epochs,validation_data=(x_test, y_test), steps_per_epoch=x_train.shape[0]//bs)
return history,model