我为 KNN 构建了自定义转换器,但我不明白为什么我的 k 数在设置时总是显示相同的精度......
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
def minkowski_dist(a, b):
return distance.minkowski(a, b)
class MyKNN(BaseEstimator, TransformerMixin):
def __init__(self, k, scale_data=False):
super().__init__()
self.k = k
if scale_data is True:
self.ss = StandardScaler()
else:
self.ss = None
self.X_train = None
def fit(self, X_train, Y_train):
self.X_train = X_train
self.Y_train = Y_train
def predict(self, X_test):
prediction = []
for row in X_test:
label = self.closest_n(row)
prediction.append(label)
return prediction
def closest_n(self, row):
best_dist = minkowski_dist(row, self.X_train[0])
best_index = 0
for i in range(1, len(self.X_train)):
dist = minkowski_dist(row, self.X_train[i])
if dist < best_dist:
best_dist = dist
best_index = i
return self.Y_train[best_index]
一直在iris 数据集上做 KNN
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris_dataset=load_iris()
X=iris_dataset.data
Y=iris_dataset.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=42, stratify=Y)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
这就是我得到的:
for k in range(1,21,2):
knn = MyKNN(k)
knn.fit(X_train, Y_train)
prediction = knn.predict(X_test)
from sklearn.metrics import accuracy_score
print("Test accuracy for k={}:".format(k),accuracy_score(Y_test, prediction))