我在 Torch 中实现了一个简单的 1 隐藏层前馈神经网络来学习 X-OR 操作。下面是我的代码:
require 'torch'
require 'nn'
m = nn.Sequential()
m:add(nn.Linear(2,2))
m:add(nn.Linear(2,1))
m:add(nn.Sigmoid())
torch.manualSeed(1)
m.modules[1].weights = torch.rand(2,2)
m.modules[2].weights = torch.rand(2,1)
--print(parax_m)
criterion = nn.BCECriterion()
inputs = torch.Tensor(4,2)
inputs[1][1] = 0
inputs[1][2] = 0
inputs[2][1] = 0
inputs[2][2] = 1
inputs[3][1] = 1
inputs[3][2] = 0
inputs[4][1] = 1
inputs[4][2] = 1
targets = torch.Tensor(4,1)
targets[1][1] = 0
targets[2][1] = 1
targets[3][1] = 1
targets[4][1] = 0
function trainEpoch(m,criterion,inputs,targets)
for i=1,inputs:size(1) do
local input = inputs[i]
local target = targets[i]
local output = m:forward(input)
--print(output)
local loss = criterion:forward(output,target)
print(loss)
-- backward
local gradOutput = criterion:backward(output,target)
m:zeroGradParameters()
local gradInput = m:backward(input,gradOutput)
--update
--module:updateGradParameters(0.9) -- momentum (require dpnn)
m:updateParameters(0.01) -- W = W -0.1*dL/dW
end
end
for i=1,10000 do
trainEpoch(m,criterion,inputs,targets)
end
-- prediciton
testinput = torch.Tensor(4,2)
testinput[1][1] = 0
testinput[1][2] = 0
testinput[2][1] = 0
testinput[2][2] = 1
testinput[3][1] = 1
testinput[3][2] = 0
testinput[4][1] = 1
testinput[4][2] = 1
for i=1,testinput:size(1) do
local output = m:forward(testinput[i])
print(output)
end
当我运行上面的代码时,损失没有衰减(在所有迭代中几乎相同),因此它不能预测正确的输出。谁能帮我找出我在这里做错的错误?
我还尝试了不同的手动种子值,不同的权重初始化,但在所有迭代中损失仍然保持不变。