在我的 VGG16 微调中,我必须将视网膜图像分类为 2 类(或第 4 阶段或非第 4 阶段),每个类有 700 张图像要训练。这是我现在的代码:
TRAIN_DIR = 'train/'
TEST_DIR = 'test/'
v = 'v/'
BATCH_SIZE = 32
NUM_EPOCHS = 5
def ReadImages(Path):
LabelList = list()
ImageCV = list()
classes = ["nonPdr", "pdr"]
# Get all subdirectories
FolderList = [f for f in os.listdir(Path) if not f.startswith('.')]
# Loop over each directory
for File in FolderList:
for index, Image in enumerate(os.listdir(os.path.join(Path, File))):
# Convert the path into a file
ImageCV.append(cv2.resize(cv2.imread(os.path.join(Path, File) + os.path.sep + Image), (224,224)))
#ImageCV[index]= np.array(ImageCV[index]) / 255.0
LabelList.append(classes.index(os.path.splitext(File)[0]))
#ImageCV[index] = cv2.addWeighted(ImageCV[index],4, cv2.GaussianBlur(ImageCV[index],(0,0), 10), -4, 128)
image_blurred = cv2.GaussianBlur(ImageCV[index], (0, 0), 100 / 30)
ImageCV[index] = cv2.addWeighted(ImageCV[index], 4, image_blurred, -4, 128)
return ImageCV, LabelList
data, labels = ReadImages(TRAIN_DIR)
valid, vlabels = ReadImages(TEST_DIR)
vgg16_model = VGG16(weights="imagenet", include_top=True)
# (1) visualize layers
print("VGG16 model layers")
for i, layer in enumerate(vgg16_model.layers):
print(i, layer.name, layer.output_shape)
# (2) remove the top layer
base_model = Model(input=vgg16_model.input,
output=vgg16_model.get_layer("block5_pool").output)
# (3) attach a new top layer
base_out = base_model.output
base_out = Reshape((25088,))(base_out)
top_fc1 = Dropout(0.5)(base_out)
# output layer: (None, 5)
top_preds = Dense(1, activation="sigmoid")(top_fc1)
# (4) freeze weights until the last but one convolution layer (block4_pool)
for layer in base_model.layers[0:14]:
layer.trainable = False
# (5) create new hybrid model
model = Model(input=base_model.input, output=top_preds)
# (6) compile and train the model
sgd = SGD(lr=1e-4, momentum=0.9)
model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=["accuracy"])
datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(data)
es = EarlyStopping(monitor='val_loss', verbose=1)
# fits the model on batches with real-time data augmentation:
model.fit_generator(datagen.flow(np.array(data), np.array(labels), batch_size=32),
steps_per_epoch=len(np.array(data)) / 32, epochs=15,
validation_data=(np.array(valid), np.array(vlabels)),
nb_val_samples=72, callbacks=[es])
model.save('model.h5')
该模型在 4 个 epoch 中获得 1.00 的准确度,损失最小(~0.01),但是当我尝试在一些测试图像中运行 predict.py 时,实际准确度约为 50%(在 10 张图像中,应用程序仅正确回答 5)。这是我的预测。py:
model = load_model('model.h5')
for filename in os.listdir(r'v/'):
if filename.endswith(".jpg") or filename.endswith(".ppm") or filename.endswith(".jpeg") or filename.endswith(".png"):
ImageCV = cv2.resize(cv2.imread(os.path.join(TEST_DIR) + filename), (224,224))
#ImageCV = cv2.addWeighted(ImageCV,4, cv2.GaussianBlur(ImageCV,(0,0), 224/25), -4, 120)
#ImageCV = ImageCV.reshape(-1,224,224,3)
x = image.img_to_array(ImageCV)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
#print(model.predict(x))
prob = model.predict(x)
if prob < 0.5:
print("nonPDR")
else:
print("PDR")
print(filename)
我尝试过的事情:
- 更改 LR
- 在 dropout 之前放置密集(256)层- 将 dropout
增加到 0.75
- 更改一些 dataAugmentation 参数
输入将是这样的图像:https ://i.imgur.com/DsU06Xv.jpg 为了更好地分类,我在 ReadImages 方法中优化我的图像:https ://i.imgur.com/YBzaBgw.png
那么,我该怎么做才能真正提高我的预测准确性,而不仅仅是训练和准确性值呢?
是不是我做错了什么?
更新
我已经放弃了我在 ReadImages 中使用的 gaussianblur,并包括以下内容:
data = np.asarray(data)
valid = np.asarray(valid)
data = data.astype('float32')
valid = valid.astype('float32')
data /= 255
valid /= 255
在运行我的 train.py 之后:
Epoch 1/15
44/43 [==============================] - 476s 11s/step - loss: 0.7153 - acc: 0.5788 - val_loss: 0.6937 - val_acc: 0.5556
Epoch 2/15
44/43 [==============================] - 468s 11s/step - loss: 0.5526 - acc: 0.7275 - val_loss: 0.6838 - val_acc: 0.5833
Epoch 3/15
44/43 [==============================] - 474s 11s/step - loss: 0.5068 - acc: 0.7595 - val_loss: 0.6927 - val_acc: 0.5694
Epoch 00003: early stopping
之后,我在 predict.py 上更新标准和均值:
for filename in os.listdir(r'v/'):
if filename.endswith(".jpg") or filename.endswith(".ppm") or filename.endswith(".jpeg") or filename.endswith(".png"):
ImageCV = cv2.resize(cv2.imread(os.path.join(TEST_DIR) + filename), (224,224))
ImageCV = np.asarray(ImageCV)
ImageCV = ImageCV.astype('float32')
ImageCV /= 255
x = ImageCV
x = np.expand_dims(x, axis=0)
x = normalize(x, [0.12810835, 0.17897758, 0.23883381], [0.14304605, 0.18229756, 0.2362126])
prob = model.predict(x)
if prob <= 0.50:
print("nonPDR >>>", filename)
nonPdr += 1
else:
print("PDR >>>", filename)
pdr += 1
print(prob)
print("Number of retinas with PDR: ",pdr)
print("Number of retinas without PDR: ",nonPdr)
运行此代码后,我的测试目录中的准确率大约为 75%。
那么,我可以改进一些东西,或者这是这些少量图像的最大值吗?