import numpy as np
from sklearn import preprocessing, cross_validation, neighbors
import pandas as pd
from sklearn.linear_model import LinearRegression
df = pd.read_csv('Downloads/breast-cancer-wisconsin.data.txt',skiprows=1)
df.replace('?', -99999, inplace=True)
df.drop('id', 1, inplace=True )
X= np.array(df.drop(['class'],1))
y= np.array(df['class'])
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2)
#clf = neighbors.KNeighborsClassifier()
clf = LinearRegression(normalize=True)
clf.fit(X_train, y_train)
accuracy= clf.score(X_test, y_test)
print(accuracy)
example_measures = np.array([[4,2,1,1,1,2,3,2,1],[4,2,1,2,2,2,3,2,1]])
example_measures = example_measures.reshape(1,-1)
prediction = clf.predict(example_measures) ##(example_measures)
print(prediction)
当我在 Ubuntu 或 Anaconda 上运行上述命令行时出现问题:
ValueError:查询数据维度必须匹配训练数据维度
如何解决这个问题?我确信通过隔离单个命令行的方法 - 并发现它在以下位置出现错误:
prediction = clf.predict(example_measures)
我尝试使用:
prediction = clf.predict(X_test).
没关系。我真的很想预测我创建的示例。如何更改代码?