我认为这里的关键是,在训练时,具有相同的权重(以及隐藏层中的相同神经元)不会导致最优解,而是不同的权重会导致实际值和预测值之间的差异较小。
我编写了这个非常愚蠢的神经网络(作为学习练习)。也许它可能会有所帮助。
import numpy as np
class NeuralNetwork(object):
def __init__(self, X, Y, hidden_layer_dim):
self.X = X / np.max(X)
self.Y = Y / np.max(Y) # Used for training
self.hidden_layer_dim = hidden_layer_dim
def initialize_weights(self):
self.w1 = np.random.normal(0,1, (self.X.shape[1], self.hidden_layer_dim))
self.w2 = np.random.normal(0,1, self.hidden_layer_dim)
def forward(self, xi):
"""
x1 is 2d array
"""
# This method is also used for training
xi = xi / np.max(xi)
z2 = np.dot(xi, self.w1)
a2 = sigmoid(z2)
z3 = np.dot(a2, self.w2)
y_hat = sigmoid(z3)
return y_hat
def dump_train(self, n_iterations):
min_mse = np.inf
for i in range(n_iterations):
w1 = np.random.normal(0,1, (self.X.shape[1], self.hidden_layer_dim))
w2 = np.random.normal(0,1, self.hidden_layer_dim)
z2 = np.dot(self.X, w1)
a2 = sigmoid(z2)
z3 = np.dot(a2, w2)
y_hat = sigmoid(z3)
diff = self.Y - y_hat
mse = np.dot(diff, diff)
if mse < min_mse:
min_mse = mse
print("min_mse: {}, iteration: {}".format(mse, i))
self.w1 = w1
self.w2 = w2
def sigmoid(a):
return 1 / (1 + np.e ** (-a))
if __name__ == "__main__":
my_x = np.array([[8,5], [7,5], [8,4],[8,1], [4, 3], [5,2], [4,2]], dtype=np.float)
my_y = np.array([100, 90, 88, 60, 50, 45, 40], dtype=np.float)
NN = NeuralNetwork(my_x, my_y, hidden_layer_dim=3)
NN.initialize_weights()
NN.dump_train(100000)
new_x = [[8,4], [7,1], [3,3]]
y_hat = NN.forward(new_x)
print("prediction: {}".format(y_hat))
print("weight 1: {}".format(NN.w1))
print("weight 2: {}".format(NN.w2))
结果:
weight 1: [[-0.13787113 -1.30913914 0.64624687]
[-1.76733779 0.77449265 1.61122177]]
weight 2: [-1.42489674 -1.94360005 2.56365303]
权重不同。