我正在尝试在经典的泰坦尼克号数据集上用 Python 构建一个神经网络(3 层,1 个隐藏) 。我想在Siraj 的示例和3Blue1Brown 教程中包含一个偏差项,以通过反向传播更新偏差,但我知道我的维度是错误的。(我觉得我错误地更新了导致错误维度的偏差)
下面代码中的 while 循环适用于训练数据集,其中节点产品和偏差具有相同的维度,但是一旦我将测试示例传递给predict函数,维度不匹配并出现错误。我已经用节点和输入之间的点积计算的维度来评论我的代码。
有人可以帮助我理解偏差项的维度应该是什么,无论是在这种特殊情况下还是一般情况下,以及它应该如何添加(逐行,逐列)?
代码:
def sigmoid(x, deriv=False):
"""
Activation function
"""
if(deriv==True):
return (x*(1-x))
return 1/(1+np.exp(-x))
# learning rate, hidden layer dimension, error threshold, dropout rate
alpha, hidden_size, threshold, drop_rate = (0.035,32,0.1,0.5)
# x_train and y_train are the training dataset and corresponding classes
# syn0 and syn1 are the synapses, weight matrices between layers (3 layers, 2 synpases)
syn0 = 2*np.random.random((x_train.shape[1],hidden_size)) - 1 # NxH
syn1 = 2*np.random.random((hidden_size,1)) - 1 # Hx1
b1 = np.random.random((x_train.shape[0],hidden_size)) # MxH
b2 = np.random.random((x_train.shape[0],1)) # Mx1
layer_2_error = 100*np.abs(np.random.random((y_train.shape[0],1))) - 1 # Mx1
avg_err = []
count = 0
while np.mean(np.abs(layer_2_error)) > threshold:
# Forward
layer_0 = x_train # training dataset
A = np.dot(layer_0,syn0) + b1 # MxN X NxH + MxH ~ MxH
layer_1 = sigmoid(A)
# drop out to reduce overfitting
layer_1 *= np.random.binomial([np.ones((len(x_train),hidden_size))],1-drop_rate)[0] * (1/(1-drop_rate))
B = np.dot(layer_1,syn1) + b2 # MxH X Hx1 + Mx1 ~ Mx1
layer_2 = sigmoid(B)
# Backprop
layer_2_error = layer_2 - y_train # Mx1
layer_2_delta = layer_2_error * sigmoid(layer_2,deriv=True) # Mx1 * Mx1 ~ Mx1
layer_1_error = np.dot(layer_2_delta,syn1.T) # Mx1 X 1xH ~ MxH
layer_1_delta = layer_1_error * sigmoid(layer_1,deriv=True) # MxH * MxH ~ MxH
# update weights
syn1 -= alpha*np.dot(layer_1.T,layer_2_delta) # HxM X Mx1 ~ Hx1
syn0 -= alpha*np.dot(layer_0.T,layer_1_delta) # NxM X MxH ~ NxH
# update biases
b2 -= alpha*layer_2_delta # Mx1
b1 -= alpha*layer_1_delta # MxH
avg_err.append(np.mean(np.abs(layer_2_error)))
if count % 500 == 0:
print("Error after",count,"iterations:",np.mean(np.abs(layer_2_error)))
count += 1
def predict(x, w0, w1, b1, b2):
"""
Function to predict an output given a data x, weight matrices w1 & w1 and biases b1 & b2
"""
A = np.dot(x,w0) + b1 # mXN X NxH (+ MxH) ~ mxH
layer_1 = sigmoid(A)
B = np.dot(layer_1,w1) + b2 # mxH X Hx1 (+ Mx1) ~ mx1 (preds)
layer_2 = B
return (sigmoid(layer_2) > 0.5).astype(int)
