在 Torch 中学习用于 X-OR 操作的神经网络时没有损失衰减

数据挖掘 神经网络 火炬
2022-03-09 12:17:12

我在 Torch 中实现了一个简单的 1 隐藏层前馈神经网络来学习 X-OR 操作。下面是我的代码:

require 'torch'
require 'nn'

m = nn.Sequential()
m:add(nn.Linear(2,2))
m:add(nn.Linear(2,1))
m:add(nn.Sigmoid())

torch.manualSeed(1)

m.modules[1].weights = torch.rand(2,2)
m.modules[2].weights = torch.rand(2,1)

--print(parax_m)

criterion = nn.BCECriterion()

inputs = torch.Tensor(4,2)
inputs[1][1] = 0
inputs[1][2] = 0

inputs[2][1] = 0
inputs[2][2] = 1

inputs[3][1] = 1
inputs[3][2] = 0

inputs[4][1] = 1
inputs[4][2] = 1

targets = torch.Tensor(4,1)
targets[1][1] = 0
targets[2][1] = 1
targets[3][1] = 1
targets[4][1] = 0

function trainEpoch(m,criterion,inputs,targets)
    for i=1,inputs:size(1) do
        local input = inputs[i]
        local target = targets[i]
        local output = m:forward(input)
        --print(output)
        local loss = criterion:forward(output,target)
        print(loss)

            -- backward
        local gradOutput = criterion:backward(output,target)
        m:zeroGradParameters()
        local gradInput = m:backward(input,gradOutput)
        --update
        --module:updateGradParameters(0.9) -- momentum (require dpnn)
        m:updateParameters(0.01) -- W = W -0.1*dL/dW
    end
end

for i=1,10000 do
    trainEpoch(m,criterion,inputs,targets)
end

-- prediciton
testinput = torch.Tensor(4,2)
testinput[1][1] = 0
testinput[1][2] = 0

testinput[2][1] = 0
testinput[2][2] = 1

testinput[3][1] = 1
testinput[3][2] = 0

testinput[4][1] = 1
testinput[4][2] = 1

for i=1,testinput:size(1) do
    local output = m:forward(testinput[i])
    print(output)
end

当我运行上面的代码时,损失没有衰减(在所有迭代中几乎相同),因此它不能预测正确的输出。谁能帮我找出我在这里做错的错误?

我还尝试了不同的手动种子值,不同的权重初始化,但在所有迭代中损失仍然保持不变。

1个回答

最后我在我的网络中发现了错误。1.我没有在第一个线性层之后添加非线性层。2. 运行随机梯度下降时没有随机化。

通过更新这两件事,现在它工作正常。更新代码:

require 'torch'
require 'nn'

m = nn.Sequential()
m:add(nn.Linear(2,2))
m:add(nn.Tanh())
m:add(nn.Linear(2,1))
m:add(nn.Sigmoid())

--print(parax_m)

criterion = nn.BCECriterion()

inputs = torch.Tensor(4,2)
inputs[1][1] = 0
inputs[1][2] = 0

inputs[2][1] = 0
inputs[2][2] = 1

inputs[3][1] = 1
inputs[3][2] = 0

inputs[4][1] = 1
inputs[4][2] = 1

targets = torch.Tensor(4,1)
targets[1][1] = 0
targets[2][1] = 1
targets[3][1] = 1
targets[4][1] = 0

function trainEpoch(m,criterion,inputs,targets)
    for i=1,inputs:size(1) do
        local idx = math.random(1,4)
        local input = inputs[idx]
        local target = targets[idx]
        local output = m:forward(input)
        --print(output)
        local loss = criterion:forward(output,target)
        print(loss)

            -- backward
        local gradOutput = criterion:backward(output,target)
        m:zeroGradParameters()
        local gradInput = m:backward(input,gradOutput)
        --update
        --module:updateGradParameters(0.9) -- momentum (require dpnn)
        m:updateParameters(0.01) -- W = W -0.1*dL/dW
    end
end

for i=1,10000 do
    trainEpoch(m,criterion,inputs,targets)
end

-- prediciton
testinput = torch.Tensor(4,2)
testinput[1][1] = 0
testinput[1][2] = 0

testinput[2][1] = 0
testinput[2][2] = 1

testinput[3][1] = 1
testinput[3][2] = 0

testinput[4][1] = 1
testinput[4][2] = 1

for i=1,testinput:size(1) do
    local output = m:forward(testinput[i])
    print(output)
end