我正在使用 Tensorflow 后端在 Keras 中进行二进制分类。无论我调整多少,我似乎都无法让我的模型超过 67% 的 val_acc。是否有我遗漏的东西,或者这只是我可以用我的数据获得的准确度?
我的代码
以 1:1 的比例加载和平衡数据集,并创建验证数据。
from sklearn.utils import resample
raw_data = pd.read_csv('Data.csv')
df_majority = raw_data[raw_data['RESULT']==0].iloc[1:-2,0:3].dropna()
df_minority = raw_data[raw_data['RESULT']==1].iloc[1:-2,0:3].dropna()
print(raw_data['RESULT'].value_counts())
df_majority_downsampled = resample(df_majority,
replace=False,
n_samples=raw_data['RESULT'].value_counts()[1],
random_state=123)
# Combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled,df_minority])
# Display new class counts
print(df_downsampled['RESULT'].value_counts())
print(numpy.unique(df_downsampled['RESULT']))
X = df_downsampled.iloc[1:-2,0:2].dropna()
Y = df_downsampled.iloc[1:-2,2:3].dropna()
X, XTest, Y, YTest = train_test_split(X, Y, test_size = 0.3, random_state = 0)
print(YTest['RESULT'].value_counts()) #Just a double check to make
创建模型
def create_model(activation):
model = Sequential()
model.add(Dense(128,activation=activation,input_dim=2))
model.add(BatchNormalization())
model.add(Dense(64,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(32,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(16,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(8,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(4,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(2,activation=activation))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
# load weights
model.load_weights("weights.best.hdf5")
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001),metrics=['accuracy'])
return model
model = create_model('relu')
filepath ="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath,monitor='val_acc',verbose=1,save_best_only=True,mode='max')
callbacks_list = [checkpoint]
history = model.fit(X,Y,epochs=2000,batch_size=32, shuffle = True,validation_data = (XTest,YTest), verbose = 0,
callbacks=callbacks_list)
预测并获得分数
from sklearn.metrics import roc_auc_score
predict = model.predict_classes(X)
print(numpy.unique(predict))
#for index,val in enumerate(predict):
#print("Predicted: %s, actual: %s, for val %s"(val[0],Y.iloc[index].values,X.iloc[index].values))
predict = [val[0] for val in predict]
print("ras score: ",roc_auc_score(Y,predict))
predict = model.predict(numpy.array([0.0235,0.5]).reshape(-1,2))
print(predict[0][0])
使用当前模型的结果
将 Dropout 置于 Dense 之前的 Sok 建议:val_acc =0.65990

