我正在为 Keras 中的分类问题构建一个模型,为此我使用了 KerasClassifier,即包装器 scikit-learn。下面是相同的代码。
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,roc_auc_score
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.grid_search import GridSearchCV
# In[3]:
def cleanPeople(people):
people = people.drop(['date'],axis=1)
people['people_id'] = people['people_id'].apply(lambda x : x.split('_')[1])
people['people_id'] = pd.to_numeric(people['people_id']).astype(int)
fields = list(people.columns)
cat_data = fields[1:11]
bool_data = fields[11:]
for data in cat_data:
people[data] = people[data].fillna('type 0')
people[data] = people[data].apply(lambda x: x.split(' ')[1])
people[data] = pd.to_numeric(people[data]).astype(int)
for data in bool_data:
people[data] = pd.to_numeric(people[data]).astype(int)
return people
# In[4]:
def cleanAct(data, train=False):
data = data.drop(['date'],axis = 1)
if train:
data = data.drop(['outcome'],axis=1)
data['people_id'] = data['people_id'].apply(lambda x : x.split('_')[1])
data['people_id'] = pd.to_numeric(data['people_id']).astype(int)
data['activity_id'] = data['activity_id'].apply(lambda x: x.split('_')[1])
data['activity_id'] = pd.to_numeric(data['activity_id']).astype(int)
fields = list(data.columns)
cat_data = fields[2:13]
for column in cat_data:
data[column] = data[column].fillna('type 0')
data[column] = data[column].apply(lambda x : x.split(' ')[1])
data[column] = pd.to_numeric(data[column]).astype(int)
return data
# In[5]:
people = pd.read_csv("people.csv")
people = cleanPeople(people)
act_train = pd.read_csv("act_train.csv")
act_train_cleaned = cleanAct(act_train,train=True)
act_test = pd.read_csv("act_test.csv")
act_test_cleaned = cleanAct(act_test)
# In[6]:
train = act_train_cleaned.merge(people,on='people_id', how='left')
test = act_test_cleaned.merge(people, on='people_id', how='left')
# In[8]:
output = act_train['outcome']
X_train, X_test, y_train, y_test = train_test_split(train,output, test_size=0.2, random_state =7)
input_len = len(X_train)
print(input_len)
# In[9]:
def base_model(optimizer='rmsprop', init='normal', dropout_rate =0.0):
model = Sequential()
model.add(Dense(100, input_dim = input_len, activation='relu', init=init))
model.add(Dropout(dropout_rate))
model.add(Dense(50, activation = 'relu', init = init))
model.add(Dropout(dropout_rate))
model.add(Dense(10, activation = 'relu', init = init))
model.add(Dropout(dropout_rate))
model.add(Dense(1, activation = 'sigmoid', init = init))
model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics =['accuracy'])
return model
# In[10]:
seed = 7
np.random.seed(seed)
model = KerasClassifier(build_fn = base_model)
# In[12]:
#grid_parameters
optimizers = ['rmsprop', 'adam']
init = ['normal', 'uniform']
dropout_rate = [0.0, 0.2, 0.5]
epochs = [100, 150, 200]
batches = [10,20,30]
param_grid = dict(optimizer = optimizers, init=init, dropout_rate = dropout_rate, nb_epoch = epochs, batch_size=batches)
# In[ ]:
validator = GridSearchCV(estimator=model, param_grid= param_grid)
validator.fit(X_train, y_train)
print(validator.best_score_)
print(validator.best_params_)
以下代码在我的工作站上运行时引发了此错误。
Traceback (most recent call last):
File "../src/script.py", line 137, in
model.fit(X_train, y_train)
File "/opt/conda/lib/python3.5/site-packages/Keras-1.0.6-py3.5.egg/keras/wrappers/scikit_learn.py", line 148, in fit
history = self.model.fit(X, y, **fit_args)
File "/opt/conda/lib/python3.5/site-packages/Keras-1.0.6-py3.5.egg/keras/models.py", line 429, in fit
sample_weight=sample_weight)
File "/opt/conda/lib/python3.5/site-packages/Keras-1.0.6-py3.5.egg/keras/engine/training.py", line 1036, in fit
batch_size=batch_size)
File "/opt/conda/lib/python3.5/site-packages/Keras-1.0.6-py3.5.egg/keras/engine/training.py", line 963, in _standardize_user_data
exception_prefix='model input')
File "/opt/conda/lib/python3.5/site-packages/Keras-1.0.6-py3.5.egg/keras/engine/training.py", line 108, in standardize_input_data
str(array.shape))
Exception: Error when checking model input: expected dense_input_1 to have shape (None, 1757832) but got array with shape (1757832, 52)
当我在 scikit-learn 中训练模型时,没有出现这样的错误。请帮忙!