我是编码和机器学习的初学者,我的任务是学习逻辑回归的底层内容(所以我拼凑了下面的 python 代码),但我被要求弄清楚如何添加偏差进入这段代码。我完全被困在需要添加的点上,我认为我正在定义假设函数 - 但如果有人能够指出我正确的方向来解决这个问题,我将非常感激它。
如果有帮助,该逻辑回归将用于使用威斯康星州乳腺癌数据集 ( https://www.kaggle.com/uciml/breast-cancer-wisconsin-data )对肿瘤是良性还是恶性进行分类
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3)
X = data["diagnosis"].map(lambda x: float(x))
X = data[['texture_mean','perimeter_mean','smoothness_mean','compactness_mean','symmetry_mean', 'diagnosis']]
X = np.array(X)
X = min_max_scaler.fit_transform(X)
Y = data["diagnosis"].map(lambda x: float(x))
Y = np.array(Y)
def Sigmoid(z):
if z < 0:
return 1 - 1/(1 + math.exp(z))
else:
return 1/(1 + math.exp(-z))
def Hypothesis(theta, x):
z = 0
for i in range(len(theta)):
z += x[i]*theta[i]
return Sigmoid(z)enter preformatted text here
def Cost_Function(X,Y,theta,m):
sumOfErrors = 0
for i in range(m):
xi = X[i]
hi = Hypothesis(theta,xi)
error = Y[i] * math.log(hi if hi >0 else 1)
if Y[i] == 1:
error = Y[i] * math.log(hi if hi >0 else 1)
elif Y[i] == 0:
error = (1-Y[i]) * math.log(1-hi if 1-hi >0 else 1)
sumOfErrors += error
const = -1/m
J = const * sumOfErrors
print ('cost is: ', J )
return J
def Cost_Function_Derivative(X,Y,theta,j,m,alpha):
sumErrors = 0
for i in range(m):
xi = X[i]
xij = xi[j]
hi = Hypothesis(theta,X[i])
error = (hi - Y[i])*xij
sumErrors += error
m = len(Y)
constant = float(alpha)/float(m)
J = constant * sumErrors
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = []
constant = alpha/m
for j in range(len(theta)):
CFDerivative = Cost_Function_Derivative(X,Y,theta,j,m,alpha)
new_theta_value = theta[j] - CFDerivative
new_theta.append(new_theta_value)
return new_theta
initial_theta = [0,1]
alpha = 0.01
iterations = 1000
Logistic_Regression(X,Y,alpha,initial_theta,iterations)