我使用Iris 数据集的 第一列和第三列作为特征。以及Iris Setosa (-1)和Iris Versicolor (1)的标签。我使用ADALINE作为我的数据集的简单分类模型。我使用梯度下降作为成本最小化函数。但在每次迭代中,误差都会增加。我在 python 代码中做错了什么?
import numpy as np
import pandas as pd
class AdalineGD(object):
def __init__(self, eta = 0.01, n_iter = 50):
self.eta = eta
self.n_iter = n_iter
def fit (self, X, y):
"""Fit training data."""
self.w_ = np.random.random(X.shape[1])
self.cost_ = []
print ('Initial weights are: %r' %self.w_)
for i in range(self.n_iter):
output = self.net_input(X)
print ("On iteration %d, output is: %r" %(i, output))
errors = output - y
print("On iteration %d, Error is: %r" %(i, errors))
self.w_ += self.eta * X.T.dot(errors)
print ('Weights on iteration %d: %r' %(i, self.w_))
cost = (errors**2).sum() / 2.0
self.cost_.append(cost)
print ("On iteration %d, Cost is: %r" %(i, cost))
prediction = self.predict(X)
print ("Prediction after iteration %d is: %r" %(i, prediction))
input()
return self
def net_input(self, X):
"""Calculate net input"""
return X.dot(self.w_)
def activation(self, X):
"""Computer Linear Activation"""
return self.net_input(X)
def predict(self, X):
"""Return class label after unit step"""
return np.where(self.activation(X) >= 0.0, 1, -1)
####### END OF THE CLASS ########
#importing the Iris Dataset
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", header = None)
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)
X = df.iloc[0:100, [0, 2]].values
#Adding the ones column to the X matrix
X = np.insert(X, 0, np.ones(X.shape[0]), axis = 1)
ada = AdalineGD(n_iter = 20, eta = 0.001).fit(X, y)