关于改进模型的任何想法。到目前为止,我能够在多任务学习架构的每个任务特定密集网络上实现大约 0.20 的准确度。我已经发布了模型和验证准确度准确度和模型和验证损失损失。目前从图表中,我可以看到没有任何学习发生。以下是问题的完整描述。
问题描述
我正在研究一个分类问题。数据集是从 kaggle 主办的比赛中收集的,Painters by number。任务是识别给定绘画的画家、风格和流派。到目前为止,我训练了个人模型来预测给定绘画的画家、风格和流派。现在我想结合多任务学习(即)开发一个可以预测所有三个任务的模型。我想出了 多任务学习架构指定的架构
input_layer = Input(shape=(64,64,3))
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_layer)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
# Block 5
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
x = Flatten()(x)
x = Dense(100)(x)
out_style_1 = Dense(
100,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation='relu'
)(x)
out_genre_1 = Dense(
100,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation='relu'
)(x)
out_painter_1 = Dense(
100,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation='relu'
)(x)
out_style_2 = Dense(
19,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(out_style_1)
out_genre_2 = Dense(
32,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(out_genre_1)
out_painter_2 = Dense(
10,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(out_painter_1)
multi_tasking_model = Model(inputs=[input_layer],outputs=[out_style_2,out_genre_2,out_painter_2])
multi_tasking_model.summary()
multi_tasking_model.compile(
loss = 'categorical_crossentropy',
optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=0.00000001),
metrics=['accuracy']
)
history = multi_tasking_model.fit_generator(
generator = train_generator,
steps_per_epoch= 2920//50,
epochs = 100,
validation_data = valid_generator,
validation_steps = 690//50
)
数据提供者
使用 keras 数据提供程序来完成这项任务并没有帮助。所以我创建了一个自定义数据提供者。参考(斯坦福数据提供者)
import numpy as np
class DataGenerator(object):
def __init__(self,dim_x = 64,dim_y=32,dim_z=32,batch_size=32,
shuffle=True,data=None,style=None,genre=None,painter=None):
self.dim_x = dim_x
self.dim_y = dim_y
self.dim_z = dim_z
self.batch_size = batch_size
self.shuffle=shuffle
self.data = data
self.y_style = style
self.y_genre = genre
self.y_painter = painter
@property
def data(self):
return self._data
@data.setter
def data(self,value):
self._data = value
@property
def y_style(self):
return self._y_style
@y_style.setter
def y_style(self,value):
self._y_style=value
@property
def y_genre(self):
return self._y_genre
@y_genre.setter
def y_genre(self,value):
self._y_genre = value
@property
def y_painter(self):
return self._y_painter
@y_painter.setter
def y_painter(self,value):
self._y_painter = value
def __get_exploration_order(self,len_list_ids):
indexes = np.arange(len_list_ids)
if self.shuffle:
np.random.shuffle(indexes)
return indexes
def __data_generation(self,list_ids_temp):
X = np.empty((self.batch_size,self.dim_x,self.dim_y,self.dim_z))
y_style = np.empty((self.batch_size), dtype=int)
y_genre = np.empty((self.batch_size), dtype=int)
y_painter = np.empty((self.batch_size), dtype=int)
for i, ID in enumerate(list_ids_temp):
X[i,:,:,:] = self.data[ID]
y_style[i] = self.y_style[ID]
y_genre[i] = self.y_genre[ID]
y_painter[i] = self.y_painter[ID]
return X,[y_style,y_genre,y_painter]
def generate(self,len_list_ids):
while 1:
indexes = self.__get_exploration_order(len_list_ids)
imax = int(len(indexes)/self.batch_size)
for i in range(imax):
list_ids_temp = [ k for k in indexes[i*self.batch_size:(i+1)*self.batch_size]]
X,y = self.__data_generation(list_ids_temp)
yield X,y
数据提供者初始化
from DataGenerator import DataGenerator
params = {
'dim_x': 224,
'dim_y': 224,
'dim_z': 3,
'batch_size':10,
'shuffle':True
}
data_train_gen = DataGenerator(**params)
data_train_gen.data = np.load('data.npy')
data_train_gen.y_style = np.load('y_style.npy')
data_train_gen.y_genre = np.load('y_genre.npy')
data_train_gen.y_painter = np.load('y_painter.npy')
train_generator=data_train_gen.generate(data_train_gen.data.shape[0])
data_gen_valid = DataGenerator(**params)
data_gen_valid.data = np.load('data_valid.npy')
data_gen_valid.y_style = np.load('y_style_valid.npy')
data_gen_valid.y_genre = np.load('y_genre_valid.npy')
data_gen_valid.y_painter = np.load('y_painter_valid.npy')
valid_generator= data_gen_valid.generate(data_gen_valid.data.shape[0])
模型摘要
Layer (type) Output Shape Param # Connected to
input_4 (InputLayer) (None, 64, 64, 3) 0
block1_conv1 (Conv2D) (None, 64, 64, 64) 1792 input_3[0][0]
block1_conv2 (Conv2D) (None, 64, 64, 64) 36928 block1_conv1[0][0]
block1_pool (MaxPooling2D) (None, 32, 32, 64) 0 block1_conv2[0][0]
block2_conv1 (Conv2D) (None, 32, 32, 128) 73856 block1_pool[0][0]
block2_conv2 (Conv2D) (None, 32, 32, 128) 147584 block2_conv1[0][0]
block2_pool (MaxPooling2D) (None, 16, 16, 128) 0 block2_conv2[0][0]
block3_conv1 (Conv2D) (None, 16, 16, 256) 295168 block2_pool[0][0]
block3_conv2 (Conv2D) (None, 16, 16, 256) 590080 block3_conv1[0][0]
block3_conv3 (Conv2D) (None, 16, 16, 256) 590080 block3_conv2[0][0]
block3_pool (MaxPooling2D) (None, 8, 8, 256) 0 block3_conv3[0][0]
block4_conv1 (Conv2D) (None, 8, 8, 512) 1180160 block3_pool[0][0]
block4_conv2 (Conv2D) (None, 8, 8, 512) 2359808 block4_conv1[0][0]
block4_conv3 (Conv2D) (None, 8, 8, 512) 2359808 block4_conv2[0][0]
block4_pool (MaxPooling2D) (None, 4, 4, 512) 0 block4_conv3[0][0]
block5_conv1 (Conv2D) (None, 4, 4, 512) 2359808 block4_pool[0][0]
block5_conv2 (Conv2D) (None, 4, 4, 512) 2359808 block5_conv1[0][0]
block5_conv3 (Conv2D) (None, 4, 4, 512) 2359808 block5_conv2[0][0]
block5_pool (MaxPooling2D) (None, 2, 2, 512) 0 block5_conv3[0][0]
flatten_3 (Flatten) (None, 2048) 0 block5_pool[0][0]
dense_15 (Dense) (None, 100) 204900 flatten_3[0][0]
dense_16 (Dense) (None, 100) 10100 dense_15[0][0]
dense_17 (Dense) (None, 100) 10100 dense_15[0][0]
dense_18 (Dense) (None, 100) 10100 dense_15[0][0]
dense_19 (Dense) (None, 19) 1919 dense_16[0][0]
dense_20 (Dense) (None, 32) 3232 dense_17[0][0]
dense_21 (Dense) (None, 10) 1010 dense_18[0][0]
资料说明
Data(train) shape
-------------- --------
data_train.npy (2920,224,244,3)
y_style_train.npy (2920, 19)
y_genre_train.npy (2920, 32)
y_painter_train.npy (2920,10)
Data(valid) shape
-------------- --------
data_valid.npy (690,224,244,3)
y_style_valid.npy (690, 19)
y_genre_valid.npy (690, 32)
y_painter_valid.npy (690,10)
我相信我已经发布了所有可用的信息。任何提示都会非常有帮助