我有一个数据集。
我无法使用 K 折验证。我收到了错误:
ValueError("不支持 {0}".format(y_type))
ValueError: 不支持连续。
我不想对 int 进行编码,因为它可能会影响数据,而且我想了解为什么 K-fold 不起作用。
下面是我的python代码。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn import cross_validation, metrics
from sklearn.cross_validation import train_test_split
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn import preprocessing
- `List item`
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx"
names=['Relative Compactness','Surface Area','Wall Area','Roof Area','Overall Height','Orientation','Glazing Area','Glazing Area Distribution','Heating Load','Cooling Load']
df = pd.read_excel(url,names=names)
#Feature selection
train=df.sample(frac=0.8,random_state=150)
test=df.drop(train.index)
#save the original values in a dataframe so we can compare later
test_loads=test[["Cooling Load"]]
#Create 2 lists of response values to train our model
Y1=np.array(train['Heating Load'])
Y2=np.array(train['Cooling Load'])
#Select the features
train_corr=train[['Overall Height','Relative Compactness','Roof Area','Surface Area']]
test_corr=test[['Overall Height','Relative Compactness','Roof Area','Surface Area']]
seed = 7
scoring = 'accuracy'
X_train,X_test,y_train,y_test=cross_validation.train_test_split(train_corr,Y1,test_size=0.2)
kfold = model_selection.KFold(n_splits=10, random_state=seed)
cv_results = model_selection.cross_val_score(RandomForestRegressor(), X_train, y_train, cv=kfold, scoring=scoring)
print (cv_results.mean())