代码
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('D:\\test.csv')
X = dataset.iloc[:, 0:2].values
y = dataset.iloc[:, 2].values
print (X)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_0 = LabelEncoder()
X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
print (X)
onehotencoder = OneHotEncoder(categorical_features = [0,1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 0:]
X = X[:, 1:]
print (X)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print (X_test)
import keras
from keras.models import Sequential
from keras.layers import Dense
import keras.utils
classifier = Sequential()
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 9))
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))
# Adding the output layer
classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)
_, accuracy = classifier.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy*100))
#predictions
dataset = pd.read_csv('D:\\test1.csv')
a = dataset.iloc[:, 0:2].values
labelencoder_a_0 = LabelEncoder()
a[:, 0] = labelencoder_a_0.fit_transform(a[:, 0])
labelencoder_a_1 = LabelEncoder()
a[:, 1] = labelencoder_a_1.fit_transform(a[:, 1])
onehotencoder = OneHotEncoder(categorical_features = [0,1])
a = onehotencoder.fit_transform(a).toarray()
a = a[:, 0:]
a = a[:, 1:]
predictions = classifier.predict_classes(a)
# summarize the first 2 cases
for i in range(2):
print('%s => %d' % (a[i].tolist(), predictions[i]))
标签编码器后的数据看起来像
[[0 0]
[0 1]
[0 2]
[0 3]
[0 4]
[0 5]
[0 6]
[0 7]
[0 8]]
一热之后的样子
[[1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1.]]
[X_test = sc.transform(X_test)] 之后看起来像
[[ 0. 1. -0.40824829 -0.40824829 -0.40824829 -0.40824829
-0.40824829 -0.40824829 -0.40824829]
[ 1. 0. -0.40824829 -0.40824829 -0.40824829 -0.40824829
-0.40824829 -0.40824829 -0.40824829]]
X_train 看起来像
[[-0.40824829 -0.40824829 -0.40824829 -0.40824829 0. 2.44948974
0. -0.40824829 -0.40824829]
[-0.40824829 -0.40824829 -0.40824829 -0.40824829 0. -0.40824829
0. -0.40824829 2.44948974]
[-0.40824829 -0.40824829 -0.40824829 -0.40824829 0. -0.40824829
0. 2.44948974 -0.40824829]
[ 2.44948974 -0.40824829 -0.40824829 -0.40824829 0. -0.40824829
0. -0.40824829 -0.40824829]
[-0.40824829 2.44948974 -0.40824829 -0.40824829 0. -0.40824829
0. -0.40824829 -0.40824829]
[-0.40824829 -0.40824829 -0.40824829 2.44948974 0. -0.40824829
0. -0.40824829 -0.40824829]
[-0.40824829 -0.40824829 2.44948974 -0.40824829 0. -0.40824829
0. -0.40824829 -0.40824829]]
它训练成功。但是当我运行预测代码时它会出错
Error when checking input: expected dense_205_input to have shape (9,) but got array with shape (6,)
one_hot 之后的变量 (a) 看起来像
[[1. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0.]
[0. 0. 1. 0. 0. 0.]
[0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 1.]]
我不明白我的预测样本有什么问题。