数据挖掘 - 为什么我的梯度下降损失会增加？ - 吾爱随笔录

当学习率为 0.01 时，损失似乎在减少，而当我稍微提高学习率时，损失会增加。为什么会这样？梯度计算错误吗？

具有 2 个隐藏层的神经网络，第一个隐藏层中有 128 个神经元，第二个隐藏层中有 64 个神经元。输出层由单个 sigmoid 神经元组成

class FNN:
    def __init__(self):
        self.W1=None
        self.b1=None
        self.W2=None
        self.b2=None
        self.W3=None
        self.b3=None
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    def forward_prop(self,x):
        self.Z1=np.dot(self.W1,x)+self.b1
        self.A1=np.tanh(self.Z1)
        self.Z2=np.dot(self.W2,self.A1)+self.b2
        self.A2=np.tanh(self.Z2)
        self.Z3=np.dot(self.W3,self.A2)+self.b3
        self.A3=self.sigmoid(self.Z3)
        return self.A3
    def back_prop(self,x,y):
        self.forward_prop(x)
        m=x.shape[1]
        self.dZ3=self.A3-y
        self.dW3=np.dot(self.dZ3,self.A2.T)/m
        self.db3=np.sum(self.dZ3,axis=1,keepdims=True)/m
        self.dZ2=np.dot(self.W3.T,self.dZ3)*(1-self.A2**2)
        self.dW2=np.dot(self.dZ2,self.A1.T)/m
        self.db2=np.sum(self.dZ2,axis=1,keepdims=True)/m
        self.dZ1=np.dot(self.W2.T,self.dZ2)*(1-self.A1**2)
        self.dW1=np.dot(self.dZ1,x.T)/m
        self.db1=np.sum(self.dZ1,keepdims=True)/m
    def fit(self,x,y,epochs=100,learning_rate=0.01,plot=True,disp_loss=False):
        np.random.seed(4)
        self.W1=np.random.rand(128,x.shape[0])
        self.b1=np.zeros((128,1))
        self.W2=np.random.randn(64,128)
        self.b2=np.zeros((64,1))
        self.W3=np.random.randn(1,64)
        self.b3=np.zeros((1,1))
        m=x.shape[1]
        loss=[]
        for i in range(epochs):
            self.back_prop(x,y)
            self.W1-=learning_rate*self.dW1
            self.b1-=learning_rate*self.db1
            self.W2-=learning_rate*self.dW2
            self.b2-=learning_rate*self.db2
            self.W3-=learning_rate*self.dW3
            self.b3-=learning_rate*self.db3
            logprobs=y*np.log(self.A3)+(1-y)*np.log(1-self.A3)
            cost=-(np.sum(logprobs))/m
            loss.append(cost)
        e=np.arange(1,epochs+1)
        if plot:
            plt.plot(e,loss)
            plt.title('LOSS PLOT')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.show()
        if disp_loss:
            print(loss)
    def predict(self,x):
        y=np.where(self.forward_prop(x)>=0.5,1,0)
        return y

F=FNN()
F.fit(x_train,y_train)
y_pred=F.predict(x_train)

输出

学习率：0.01

学习率：1