关于改进多任务学习模型的思考

数据挖掘 喀拉斯 多任务学习
2021-10-08 19:46:04

关于改进模型的任何想法。到目前为止,我能够在多任务学习架构的每个任务特定密集网络上实现大约 0.20 的准确度。我已经发布了模型和验证准确度准确和模型和验证损失损失。目前从图表中,我可以看到没有任何学习发生。以下是问题的完整描述。

问题描述

我正在研究一个分类问题。数据集是从 kaggle 主办的比赛中收集的,Painters by number任务是识别给定绘画的画家、风格和流派。到目前为止,我训练了个人模型来预测给定绘画的画家、风格和流派。现在我想结合多任务学习(即)开发一个可以预测所有三个任务的模型。我想出了 多任务学习架构指定的架构

 input_layer   = Input(shape=(64,64,3))

 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(input_layer)
 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
 x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

 # Block 2
 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
 x = Conv2D(128, (3, 3), activation='relu', padding='same',   name='block2_conv2')(x)
 x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

 # Block 3
 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
 x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

 # Block 4
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
 x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

 # Block 5
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
 x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

 x = Flatten()(x)
 x = Dense(100)(x)

 out_style_1 = Dense(
         100,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero',
         kernel_regularizer = l2(l=0.0001),
         activation='relu'
     )(x)

 out_genre_1 = Dense(
         100,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero',
         kernel_regularizer = l2(l=0.0001),
         activation='relu'
     )(x)

 out_painter_1 = Dense(
         100,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero',
         kernel_regularizer = l2(l=0.0001),
         activation='relu'
     )(x)

 out_style_2 = Dense(
         19,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero',
         kernel_regularizer = l2(l=0.0001),
         activation = 'softmax',
     )(out_style_1)

  out_genre_2 = Dense(
         32,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero', 
         kernel_regularizer = l2(l=0.0001),
         activation = 'softmax',
     )(out_genre_1)

 out_painter_2 = Dense(
         10,
         kernel_initializer=glorot_normal(seed=seed_val),
         bias_initializer = 'zero',
         kernel_regularizer = l2(l=0.0001),
         activation = 'softmax',
     )(out_painter_1)

 multi_tasking_model = Model(inputs=[input_layer],outputs=[out_style_2,out_genre_2,out_painter_2])
 multi_tasking_model.summary()


 multi_tasking_model.compile(
   loss = 'categorical_crossentropy',
   optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999,   epsilon=0.00000001),
 metrics=['accuracy']
  )


 history = multi_tasking_model.fit_generator(
    generator = train_generator,
    steps_per_epoch= 2920//50,
    epochs = 100,
    validation_data = valid_generator,
    validation_steps = 690//50
)

数据提供者

使用 keras 数据提供程序来完成这项任务并没有帮助。所以我创建了一个自定义数据提供者。参考(斯坦福数据提供者

import numpy as np

class DataGenerator(object):

  def __init__(self,dim_x = 64,dim_y=32,dim_z=32,batch_size=32,
shuffle=True,data=None,style=None,genre=None,painter=None):

    self.dim_x = dim_x
    self.dim_y = dim_y
    self.dim_z = dim_z
    self.batch_size = batch_size
    self.shuffle=shuffle
    self.data = data
    self.y_style = style
    self.y_genre = genre
    self.y_painter = painter

 @property
 def data(self):
    return self._data

 @data.setter
 def data(self,value):
    self._data = value

 @property
 def y_style(self):
    return self._y_style

 @y_style.setter
 def y_style(self,value):
    self._y_style=value

 @property
 def y_genre(self):
    return self._y_genre

 @y_genre.setter
 def y_genre(self,value):
    self._y_genre = value

 @property
 def y_painter(self):
    return self._y_painter

 @y_painter.setter
 def y_painter(self,value):
    self._y_painter = value

 def __get_exploration_order(self,len_list_ids):
    indexes = np.arange(len_list_ids)
    if self.shuffle:
        np.random.shuffle(indexes)
    return indexes

 def __data_generation(self,list_ids_temp):
    X = np.empty((self.batch_size,self.dim_x,self.dim_y,self.dim_z))
    y_style = np.empty((self.batch_size), dtype=int)
    y_genre = np.empty((self.batch_size), dtype=int)
    y_painter = np.empty((self.batch_size), dtype=int)

    for i, ID in enumerate(list_ids_temp):
        X[i,:,:,:] = self.data[ID]

        y_style[i] = self.y_style[ID]
        y_genre[i] = self.y_genre[ID]
        y_painter[i] = self.y_painter[ID]

    return X,[y_style,y_genre,y_painter]


 def generate(self,len_list_ids):
    while 1:
        indexes = self.__get_exploration_order(len_list_ids)

        imax = int(len(indexes)/self.batch_size)
        for i in range(imax):
            list_ids_temp = [ k for k in indexes[i*self.batch_size:(i+1)*self.batch_size]]

            X,y = self.__data_generation(list_ids_temp)

            yield X,y

数据提供者初始化

  from DataGenerator import DataGenerator

  params = {
  'dim_x': 224,
  'dim_y': 224,
  'dim_z': 3,
  'batch_size':10,
  'shuffle':True
 }

 data_train_gen = DataGenerator(**params)
 data_train_gen.data = np.load('data.npy')
 data_train_gen.y_style = np.load('y_style.npy')
 data_train_gen.y_genre = np.load('y_genre.npy')
 data_train_gen.y_painter = np.load('y_painter.npy')
 train_generator=data_train_gen.generate(data_train_gen.data.shape[0])


 data_gen_valid = DataGenerator(**params)
 data_gen_valid.data = np.load('data_valid.npy')
 data_gen_valid.y_style = np.load('y_style_valid.npy')
 data_gen_valid.y_genre = np.load('y_genre_valid.npy')
 data_gen_valid.y_painter = np.load('y_painter_valid.npy')
 valid_generator= data_gen_valid.generate(data_gen_valid.data.shape[0])

模型摘要

   Layer (type)                    Output Shape         Param #     Connected to
   input_4 (InputLayer)         (None, 64, 64, 3)         0
   block1_conv1 (Conv2D)           (None, 64, 64, 64)   1792        input_3[0][0]
   block1_conv2 (Conv2D)           (None, 64, 64, 64)   36928       block1_conv1[0][0]
   block1_pool (MaxPooling2D)      (None, 32, 32, 64)   0           block1_conv2[0][0]
   block2_conv1 (Conv2D)           (None, 32, 32, 128)  73856       block1_pool[0][0]
   block2_conv2 (Conv2D)           (None, 32, 32, 128)  147584      block2_conv1[0][0]
   block2_pool (MaxPooling2D)      (None, 16, 16, 128)  0           block2_conv2[0][0]
   block3_conv1 (Conv2D)           (None, 16, 16, 256)  295168      block2_pool[0][0]
   block3_conv2 (Conv2D)           (None, 16, 16, 256)  590080      block3_conv1[0][0]
   block3_conv3 (Conv2D)           (None, 16, 16, 256)  590080      block3_conv2[0][0]
   block3_pool (MaxPooling2D)      (None, 8, 8, 256)    0           block3_conv3[0][0]
   block4_conv1 (Conv2D)           (None, 8, 8, 512)    1180160     block3_pool[0][0]
   block4_conv2 (Conv2D)           (None, 8, 8, 512)    2359808     block4_conv1[0][0]
   block4_conv3 (Conv2D)           (None, 8, 8, 512)    2359808     block4_conv2[0][0]
   block4_pool (MaxPooling2D)      (None, 4, 4, 512)    0           block4_conv3[0][0]
   block5_conv1 (Conv2D)           (None, 4, 4, 512)    2359808     block4_pool[0][0]
   block5_conv2 (Conv2D)           (None, 4, 4, 512)    2359808     block5_conv1[0][0]
   block5_conv3 (Conv2D)           (None, 4, 4, 512)    2359808     block5_conv2[0][0]
   block5_pool (MaxPooling2D)      (None, 2, 2, 512)    0           block5_conv3[0][0]
   flatten_3 (Flatten)             (None, 2048)         0           block5_pool[0][0]
   dense_15 (Dense)                (None, 100)          204900      flatten_3[0][0]
   dense_16 (Dense)                (None, 100)          10100       dense_15[0][0]
   dense_17 (Dense)                (None, 100)          10100       dense_15[0][0]
   dense_18 (Dense)                (None, 100)          10100       dense_15[0][0]
   dense_19 (Dense)                (None, 19)           1919        dense_16[0][0]
   dense_20 (Dense)                (None, 32)           3232        dense_17[0][0]
   dense_21 (Dense)                (None, 10)           1010        dense_18[0][0]

资料说明

   Data(train)                shape
   --------------            --------
   data_train.npy            (2920,224,244,3)
   y_style_train.npy         (2920, 19)
   y_genre_train.npy         (2920, 32)
   y_painter_train.npy       (2920,10)

   Data(valid)                shape
   --------------            --------
   data_valid.npy            (690,224,244,3)
   y_style_valid.npy         (690, 19)
   y_genre_valid.npy         (690, 32)
   y_painter_valid.npy       (690,10)

我相信我已经发布了所有可用的信息。任何提示都会非常有帮助

1个回答

调整优化器超参数 1) 使用 amsgrad = True 2) 尝试降低 beta_1 3) 将 lr 从 1e-6 调整到 .1 或使用学习率查找器 4) 在优化器中使用 weight_decay(学习计划)或添加在训练时调整学习率的回调

在这里调整参数..... optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=0.00000001)

如上所述,辍学、批量标准化也有帮助。我还建议使用泄漏的 relu 或 prelu 来代替 relu 激活。

还使用 'he_uniform' 初始化器而不是默认值来初始化 conv 权重。