我正在尝试使用https://github.com/greydanus/mr_london/blob/master/app/model/textgen.py所采取的方法来了解 Keras 的 LSTM 的幕后情况。

我有一个顺序模型，它有两层 LSTM，后面是密集层和输出层。我可以从 Keras 模型访问每一层的权重，并尝试使用这些权重使用 numpy 复制相同的模型预测。但是，每当我在代码中设置大于 1 的“单位”值时，与 Keras 的“model.predict()”相比，我得到的输出非常不同。此外，Keras 的“model.summary()”输出的形状与我得到的不同。我的代码如下。

导入库

from keras.models import Model   
from keras.layers import Input, Dense, LSTM  
import numpy as np  
import keras.backend as K  
from __future__ import division  
from numpy import array  
from keras.models import Sequential  
np.random.seed(42)

一堆基本功能

def tanh(x):
    return np.tanh(x)  

def sigmoid(x):  
    return 1 / (1 + np.exp(-x))

def hard_sigmoid(x):
    slope = 0.2
    shift = 0.5
    x = (x * slope) + shift
    x = np.clip(x, 0, 1)
    return x

def softmax_2D(X):
    maxes = np.amax(w, axis=1)
    maxes = maxes.reshape(maxes.shape[0], 1)
    e = np.exp(w - maxes)
    dist = e / np.sum(e, axis=1, keepdims=True)
    dist = np.float64(dist)
    return dist

LSTM 层的辅助函数

def nsteps(xi, xf, xo, xc, hprev, Cprev, U_i, U_f, U_o, U_c):
    nsteps = xi.shape[0] # should be n long
    output = np.zeros_like(xi) # [n,1,m]
    memory = np.zeros_like(xi) # [n,1,m]

    for t in range(nsteps):
        xi_t = xi[:,t,:] ; xf_t = xf[:,t,:] ; xc_t = xc[:,t,:] ; xo_t = xo[:,t,:] # [1,m] for all
    
        i_t = hard_sigmoid(xi_t + np.dot(hprev, U_i)) #[1,m] + [m]*[m,m] -> [1,m]
        f_t = hard_sigmoid(xf_t + np.dot(hprev, U_f)) #[1,m] + [m]*[m,m] -> [1,m]
        o_t = hard_sigmoid(xo_t + np.dot(hprev, U_o)) #[1,m] + [m]*[m,m] -> [1,m]
        c_t = f_t*Cprev + i_t * np.tanh(xc_t + np.dot(hprev, U_c)) #[1,m]*[m] + [1,m] * [1,m] -> [1,m]
        h_t = o_t * np.tanh(c_t) #[1,m]*[1,m] (elementwise)

        output[t,:,:] = h_t ; memory[t,:,:] = c_t
        hprev = h_t # [1,m]
        Cprev = c_t # [1,m]
    return [output, memory]

主 LSTM 层

def lstm_layer(X, units, i=0, seq=False):
    kernel_weights = model.layers[i].get_weights()[0]
    recurrent_kernel_weights = model.layers[i].get_weights()[1]
    bias = model.layers[i].get_weights()[2]

    #Get the weights

    # kernels
    kernel_i = kernel_weights[:, :units]
    kernel_f = kernel_weights[:, units: units * 2]
    kernel_c = kernel_weights[:, units * 2: units * 3]
    kernel_o = kernel_weights[:, units * 3:]

    # recurrent kernel
    h_i = recurrent_kernel_weights[:, :units]
    h_f = recurrent_kernel_weights[:, units: units * 2]
    h_c = recurrent_kernel_weights[:, units * 2: units* 3]
    h_o = recurrent_kernel_weights[:, units * 3:]

    # bias
    bias_i = bias[:units]
    bias_f = bias[units: units * 2]
    bias_c = bias[units * 2: units * 3]
    bias_o = bias[units * 3:]

    x_i = np.dot(X,kernel_i) + bias_i
    x_f = np.dot(X,kernel_f) + bias_f
    x_c = np.dot(X,kernel_c) + bias_c
    x_o = np.dot(X,kernel_o) + bias_o

    # Set cell & memory states to zeros
    h_tm1 = np.zeros((1, len(bias_i)))
    c_tm1 = np.zeros((1, len(bias_i)))

    h_tm1_i = h_tm1
    h_tm1_f = h_tm1
    h_tm1_c = h_tm1
    h_tm1_o = h_tm1

    [output, memory] = nsteps(x_i, x_f, x_o, x_c, h_tm1, c_tm1, h_i, h_f, h_o, h_c)

    if seq:
        return output
    else:
         output = output[:,timesteps-1,:]
         output = output.reshape(1,units)
    return output

现在是密集层

def dense(X, i=0):
    W = model.layers[i].get_weights()[0]
    b = model.layers[i].get_weights()[1]
    output = np.dot(X, W) + b
    return output

立即创建模型

data_dim = 28  
timesteps = 32  
num_classes = 2  
units = 5

预期的输入数据形状：（batch_size、timesteps、data_dim）

model = Sequential()  
model.add(LSTM(units, return_sequences=True, input_shape=(timesteps, 
data_dim)))  
model.add(LSTM(units))  
model.add(Dense(2, activation='softmax'))  
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

生成虚拟训练数据

x_train = np.random.random((1, timesteps, data_dim))  
x_train = np.where(x_train > 0.5, 1.0, 0.0)  
y_train = np.random.random((1, num_classes))  
y_train = np.where(y_train > 0.5, 1.0, 0.0)

生成虚拟验证数据

x_val = np.random.random((1, timesteps, data_dim))  
x_val = np.where(x_val > 0.5, 1.0, 0.0)  
y_val = np.random.random((1, num_classes))  
y_val = np.where(y_val > 0.5, 1.0, 0.0)

适合模型

model.fit(x_train, y_train,batch_size=1, epochs=1,validation_data=(x_val, y_val))

打印模型摘要

print(model.summary())

Numpy 预测模型

def predict_i(X, units):
    print ('Shape of input to LSTM layer 1:', X.shape)
    h = lstm_layer(X, units=units, i=0, seq=True) ; X = h
    print('Shape of output from LSTM layer 1:', X.shape)
    h = lstm_layer(X, units = units, i = 1, seq=False) ; X = h
    print ('Shape of output from LSTM layer 2:', X.shape)
    h = dense(X, i = 2) ; X = h
    print ('Shape of output from dense layer:', X.shape)
    h = softmax_2D(X) ; X = h[0] 
    return h

从 numpy 模型中获取预测

predict_i(x_train,units)

从 Keras 模型中获取预测

model.predict(x_train)

Keras 模型的预测与 numpy 模型不匹配。非常感谢任何帮助/反馈。

Keras LSTM：使用 Keras 模型中的权重使用 numpy 复制预测

导入库