数据挖掘 - 自己的神经网络实现严重欠拟合数据 - 吾爱随笔录

我尝试自己实现一个基本的深度神经网络算法来解决分类问题。我已经尝试使用 iris 数据集进行此测试，但是，我的实现给了我非常糟糕的结果，它严重欠拟合数据，我得到的最佳准确度是 66%，最低甚至达到 0%，对于每个运行我的算法，即使我设置了低随机性种子，我得到的结果也有很大差异。

我选择了 tanh 激活函数、0.01 的学习率、输出层的 softmax 激活和输入变量的标准标量归一化。

所以，我想知道我是否做错了任何数学部分，或者错过了这个算法的任何基本部分，非常感谢任何建议或更正。非常感谢你。

这是代码：

data = load_iris()

X = data.data

y = data.target

class Neural_Network:


def __init__(self, n_hlayers, n_nodes, lr):

    #No. of hidden layers
    self.n_layers = n_hlayers

    #No. of nodes in each of the hidden layer
    self.n_nodes = n_nodes

    #Learning rate of the algorithm
    self.lr = lr

    # Dictionary to hold the node values of all the layers
    self.layers = { }

    # Dictionary to hold the weight values of all the layers
    self.weights = { }

def _softmax(self,values):

    '''Function to perform softmax activation on the node values

    returns probabilities of each feature'''

    exp_scores = np.exp(values)

    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    return probs

def _derivate_tanh(self,values):

    '''Function that performs derivative of a tanh activation function'''

    #Derivative of tanh is 1 - tanh^2 x
    return (1 - np.power(values, 2))

def fit(self,X,y):

    '''This function constructs a Neural Network with given hyper parameters and then runs it for 

    given no. of epochs. No. of nodes in all the hidden layers are the same for simplicity's sake.

    returns: None / NA'''
    print('Fitting the data ')

    try:
        X = np.array(X)
        y = np.array(y)

    except:
        print('Could not make sense of the inputs')

    # No. of examples and the dimensions of each sample
    self.num_examples, self.features = X.shape

#Setting default layers

    #Input layer
    self.layers['input'] = np.zeros(shape=[1,self.features])

    #Hidden layers
    for i in range(1, (self.n_layers+ 1 )):

        self.layers['layer-1' + str(i)] = np.zeros(shape=[1,self.n_nodes])


    #Output layer
    self.layers['output'] = np.zeros(shape=[1, len(np.unique(y))    ])


#Setting random weights

    for i in range(1, (self.n_layers+2)):

        #Weights for first layer
        if i == 1:
            self.weights['weight-1' + str(i)] = np.random.uniform(low=0.1, high = 0.2, size=[self.features, self.n_nodes])

        #Weights for hidden layer
        elif i < (self.n_layers+1): 
            self.weights['weight-1' + str(i)] = np.random.uniform(low = 0.1, high = 0.2, size=[self.n_nodes, self.n_nodes])

        #Weights for output layer
        else:
            self.weights['weight-1' + str(i)] = np.random.uniform(low = 0.1, high = 0.2, size = [self.n_nodes, len(np.unique(y))])


    #no. of epochs taken from the user
    epochs = int( input('Please choose no.of epochs: '))

    #Standard Scaler to normalize the input data
    S_s = StandardScaler()


    self.X = S_s.fit_transform(X)

    self.y = y.reshape(self.num_examples, 1)


    for ep in range(epochs):


        #Forward propogate on 
        self._Forward_Propogate()

        if ep % 100 == 0:

            #Calculating the accuracy of the predictions
            self. acc = np.sum (self.y.flatten() == np.argmax( self.layers['output'], axis = 1) ) / self.num_examples

            print('Accuracy in epoch ', ep, ' is :', self.acc)

        #Backward propogating
        self._Backward_Propogation()

def _Forward_Propogate(self):

    '''This functions performs forward propogation on the input data through the hidden layers and on the output layer

    activations: tanh for all layers except the output layer

    returns: None/NA.'''

    #Feeding the input layer the normalized inputs
    self.layers['input'] = self.X

    #Forward propogating
    for i in range(1, len(self.layers.keys())):

        #Input Layer dot-product with first set of weights
        if i == 1:
            dp = self.layers['input'].dot(self.weights['weight-1' + str(i)])

            #Storing the result in first hidden layer after performing tanh activation on values
            self.layers['layer-1' + str(i)] = np.tanh(dp)

        #Hidden Layers dot-product with weights for the hidden layer
        elif i != (len(self.layers.keys())-1):

            dp = self.layers['layer-1' + str(i-1)]. dot(self.weights['weight-1' + str(i)])

            #Storing the result in next hidden layer after performing tanh activation on values
            self.layers['layer-1'+ str(i)] = np.tanh(dp)

        # dot-product of last hidden layer with last set of weights    
        else:

            dp = self.layers['layer-1' + str(i-1)].dot(self.weights['weight-1' + str(i)])

            #Storing the result in the output layerafter performing softmax activation on the values
            self.layers['output'] = self._softmax(dp)

def _Backward_Propogation(self):

    '''This function performs back propogation using normal/ naive gradient descent algorithm on the weights of the output layer

    through the hidden layer until the input layer weights

    returns:None/NA'''

    #Dictionary to hold Delta / Error values of each layer
    self.delta = {}

    #Dictionary to hold Gradient / Slope values of each layer
    self.gradients = {}


    #Calculating the error
    error = self.y - self.layers['output']

    #Adjusting weights of the network starting from weights of the output layer
    for i in reversed( range( 1, len(self.weights.keys())  +1   ) ):

        #Adjusting weights for the last layer
        if i == len(self.weights.keys()):


            #Delta for the output layer weights
            self.delta['delta_out'] = error * self.lr

            #Gradient or slope for the last layer's weights
            self.gradients['grad_out'] = self.layers['layer-1' + str(i-1)].T.dot(

                                self.delta['delta_out'])

            #Adjusting the original weights for the output layer
            self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (

                                self.lr * self.gradients['grad_out'])


        #Adjusting weights for last but one layer
        elif i == len(self.weights.keys()) - 1:

            # Delta / error values of the first hidden layer weights seen from the output layer
            self.delta['delta_1' + str(i)] = self.delta['delta_out'].dot( 

                        self.weights['weight-1' + str(i+1)].T ) * self._derivate_tanh(self.layers['layer-1' + str(i)])

            # Gradient / Slope for the weights of the first hidden layer seen from the output layer
            self.gradients['grad_1' + str(i) ] = self.layers['layer-1' + str(i-1)].T.dot( 

                                                        self.delta['delta_1' + str(i)])


            #Adjusting weights of the last but one layer
            self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (

                                                        self.lr * self.gradients['grad_1' + str(i)])

        #Adjusting weights for all other hidden layers
        elif i > 1:


            #Delta / Error values for the weights in the hidden layers
            self.delta['delta_1' + str(i)] = self.delta['delta_1' + str(i+1)].dot(

                self.weights['weight-1' + str(i+1)]) * self._derivate_tanh(self.layers['layer-1' + str(i)])

            #Gradient / Slope values for the weights of hidden layers
            self.gradients['grad_1' + str(i)] = self.layers['layer-1' + str(i-1)].T.dot(

                self.delta['delta_1' + str(i)])

            #Adjusting weights of the hidden layer
            self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (

                                                self.lr * self.gradients['grad_1' + str(i)])

        #Adjusting weights which are matrix-multipled with the input layer   
        else:


            # Delta / Error values for the weights that come after the input layer
            self.delta['delta_inp'] = self.delta['delta_1' + str(i+1)].dot(

                self.weights['weight-1' + str(i+1)]) * self._derivate_tanh( self.layers['layer-1' + str(i)])

            #Gradient / Slope values for the weights that come after the input layer
            self.gradients['grad_1' + str(i)] = self.layers['input'].T.dot(self.delta['delta_inp'])

            #Adjusting weights
            self.weights['weight-1' + str(i)] = self.weights['weight-1' + str(i)] - (

                                                self.lr * self.gradients['grad_1' + str(i)])

这是一个示例结果：

ob = Neural_Network(5, 50, 0.01)

 ob.fit(X,y)

 Please choose no.of epochs: 800
Accuracy in epoch  0  is : 0.17333333333333334
Accuracy in epoch  100  is : 0.18
Accuracy in epoch  200  is : 0.18
Accuracy in epoch  300  is : 0.18
Accuracy in epoch  400  is : 0.18
Accuracy in epoch  500  is : 0.18
Accuracy in epoch  600  is : 0.18
Accuracy in epoch  700  is : 0.18