实现神经网络:低准确率

数据挖掘 神经网络
2022-03-02 13:37:42

我正在MONK的问题数据集上训练我的神经网络实现。我读到使用我正在使用的超参数(第 9 章)几乎可以实现 0 损失这些是我在 90 个 epoch 后的训练集上的结果,如您所见,它们并不好:

Training error after 0 iterations: 0.26427015361
Training error after 20 iterations: 0.231331617693
Training error after 40 iterations: 0.216396390383
Training error after 60 iterations: 0.198107908553
Training error after 80 iterations: 0.166181613583
TrErr: 0.155389641876

这是我的神经网络的代码。我认为代码对于专家来说非常简单易读,如果您需要任何说明,请告诉我。

# paramaters
etas = [0.1]
hidden_dim = 2
alpha = 0.5
reg_lambda = 0.0

input_dim = 17
output_dim = 1

# read and parse MONK data
def parse_data(text):
    with open(text) as f:
        content = f.readlines()    
    t = np.zeros((len(content), output_dim))
    X = np.zeros((len(content), input_dim))
    for i,val in enumerate(content):
        s = val.split(" ")
        t[i] = float(s[1])
        for idx in xrange(17):
            X[i][idx] = 0.0
        j = -1
        # |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10|11|12|13|14|15|16
        # |a1|a1|a1|a2|a2|a2|a3|a3|a4|a4|a4|a5|a5|a5|a5|a6|a6
        X[i][int(s[2])+j] = 1.0 # a1
        j = 2
        X[i][int(s[3])+j] = 1.0 # a2
        j = 5
        X[i][int(s[4])+j] = 1.0 # a3
        j = 7
        X[i][int(s[5])+j] = 1.0 # a4
        j = 10
        X[i][int(s[6])+j] = 1.0 # a5
        j = 14
        X[idx][int(s[7])+j] = 1.0 # a6

    return (X, t)

def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

def sigmoid_output_to_derivative(output):
    return output*(1-output)

# initialize weights
W0 = np.random.uniform(-0.7,0.7,size=(input_dim,hidden_dim))
W1 = np.random.uniform(-0.7,0.7,size=(hidden_dim,output_dim))
b0 = np.zeros((1, hidden_dim))
b1 = np.zeros((1, output_dim))
dW0 = 0
dW1 = 0

for j in xrange(90):
    # forward propagation
    l0 = X
    l1 = sigmoid(np.dot(l0,W0) + b0)
    l2 = sigmoid(np.dot(l1,W1) + b1)

    # back propagation
    l2_error = t - l2
    l2_delta = l2_error*sigmoid_output_to_derivative(l2) 

    l1_error = l2_delta.dot(W1.T)
    l1_delta = l1_error*sigmoid_output_to_derivative(l1)

    if (j% 20) == 0:
        print "Training error after", j, "iterations:",np.mean(np.square(l2_error))

    dW1 = eta*(l1.T.dot(l2_delta) + reg_lambda*W1) + alpha*dW1
    dW0 = eta*(l0.T.dot(l1_delta) + reg_lambda*W0) + alpha*dW0
    db1 = eta*(np.sum(l2_delta, axis=0))
    db0 = eta*(np.sum(l1_delta, axis=0))

    W1 += dW1
    W0 += dW0
    b1 += db1
    b0 += db0
2个回答

好像你刚刚遇到了一个错误@你的 parse_data。(见我的评论:替换X[idx][int(s[7])+j] = 1.0 # a6X[i][int(s[7])+j] = 1.0 # a6)。

我改变了参数。你会看到他们有很大的影响力。我想这个问题很容易停留在局部最小值上。

你能运行我的代码吗(唯一的区别是它是python 3,但应该在python 2上运行)

这是data1.txt中保存的数据的链接:http: //mlearn.ics.uci.edu/databases/monks-problems/monks-2.test

将 numpy 导入为 np 导入 o​​s 将 matplotlib.pyplot 导入为 plt

abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)

# paramaters
eta = 0.05
hidden_dim = 2
alpha = 0.8
reg_lambda = 0.0
epochs = 100

file = 'data1.txt'

input_dim = 17
output_dim = 1

# read and parse MONK data
def parse_data(text):
    with open(text) as f:
        content = f.readlines()
    t = np.zeros((len(content), output_dim))
    X = np.zeros((len(content), input_dim))
    for i,val in enumerate(content):
        s = val.split(" ")
        t[i] = float(s[1])
        # for idx in range(17):
        #   X[i][idx] = 0.0
        j = -1
        # |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10|11|12|13|14|15|16
        # |a1|a1|a1|a2|a2|a2|a3|a3|a4|a4|a4|a5|a5|a5|a5|a6|a6
        X[i][int(s[2])+j] = 1.0 # a1
        j = 2
        X[i][int(s[3])+j] = 1.0 # a2
        j = 5
        X[i][int(s[4])+j] = 1.0 # a3
        j = 7
        X[i][int(s[5])+j] = 1.0 # a4
        j = 10
        X[i][int(s[6])+j] = 1.0 # a5
        j = 14
        X[i][int(s[7])+j] = 1.0 # a6

    return (X, t)

X, t = parse_data(file)

print(np.sum(t)/len(t))

def sigmoid(x):
    output = 1/(1+np.exp(-x))
    return output

def sigmoid_output_to_derivative(output):
    return output*(1-output)

# initialize weights
seed = 1
np.random.seed(seed)
W0 = np.random.normal(0, 1/input_dim, size = (input_dim,hidden_dim))
W1 = np.random.normal(0, 1/hidden_dim, size = (hidden_dim,output_dim))
b0 = np.zeros((1, hidden_dim))
b1 = np.zeros((1, output_dim))
dW0 = 0
dW1 = 0


error_list = []
for j in range(epochs):
    # forward propagation
    l0 = X
    l1 = sigmoid(np.dot(l0,W0) + b0)
    l2 = sigmoid(np.dot(l1,W1) + b1)

    # back propagation
    l2_error = l2 - t
    l2_delta = l2_error*sigmoid_output_to_derivative(l2)

    l1_error = l2_delta.dot(W1.T)
    l1_delta = l1_error*sigmoid_output_to_derivative(l1)

    error = np.mean(np.square(l2_error))
    error_list.append(error)
    if (j% 20) == 0:
        print("Training error after", j, "iterations:",error)

    dW1 = -eta * (l1.T.dot(l2_delta) + reg_lambda * W1) + alpha * dW1
    dW0 = -eta * (l0.T.dot(l1_delta) + reg_lambda * W0) + alpha * dW0
    db1 = -eta*(np.sum(l2_delta, axis=0))
    db0 = -eta*(np.sum(l1_delta, axis=0))

    W1 += dW1
    W0 += dW0
    b1 += db1
    b0 += db0

plt.plot(error_list)
plt.show()

如果我是你,我会从 python 库中导入 sigmoid 函数,例如sklearn,因为他们计算它可能在数值上比我们更稳定。

还有一件事:反向传播将误差计算为,其中是样本的正确标签,而是网络的预测输出。您将其计算为,因此请更改tioitiioioiti

l2_error = l2 - t

l2_error = t - l2

我希望这有帮助。