我正在尝试使用https://github.com/greydanus/mr_london/blob/master/app/model/textgen.py所采取的方法来了解 Keras 的 LSTM 的幕后情况。
我有一个顺序模型,它有两层 LSTM,后面是密集层和输出层。我可以从 Keras 模型访问每一层的权重,并尝试使用这些权重使用 numpy 复制相同的模型预测。但是,每当我在代码中设置大于 1 的“单位”值时,与 Keras 的“model.predict()”相比,我得到的输出非常不同。此外,Keras 的“model.summary()”输出的形状与我得到的不同。我的代码如下。
导入库
from keras.models import Model
from keras.layers import Input, Dense, LSTM
import numpy as np
import keras.backend as K
from __future__ import division
from numpy import array
from keras.models import Sequential
np.random.seed(42)
一堆基本功能
def tanh(x):
return np.tanh(x)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def hard_sigmoid(x):
slope = 0.2
shift = 0.5
x = (x * slope) + shift
x = np.clip(x, 0, 1)
return x
def softmax_2D(X):
maxes = np.amax(w, axis=1)
maxes = maxes.reshape(maxes.shape[0], 1)
e = np.exp(w - maxes)
dist = e / np.sum(e, axis=1, keepdims=True)
dist = np.float64(dist)
return dist
LSTM 层的辅助函数
def nsteps(xi, xf, xo, xc, hprev, Cprev, U_i, U_f, U_o, U_c):
nsteps = xi.shape[0] # should be n long
output = np.zeros_like(xi) # [n,1,m]
memory = np.zeros_like(xi) # [n,1,m]
for t in range(nsteps):
xi_t = xi[:,t,:] ; xf_t = xf[:,t,:] ; xc_t = xc[:,t,:] ; xo_t = xo[:,t,:] # [1,m] for all
i_t = hard_sigmoid(xi_t + np.dot(hprev, U_i)) #[1,m] + [m]*[m,m] -> [1,m]
f_t = hard_sigmoid(xf_t + np.dot(hprev, U_f)) #[1,m] + [m]*[m,m] -> [1,m]
o_t = hard_sigmoid(xo_t + np.dot(hprev, U_o)) #[1,m] + [m]*[m,m] -> [1,m]
c_t = f_t*Cprev + i_t * np.tanh(xc_t + np.dot(hprev, U_c)) #[1,m]*[m] + [1,m] * [1,m] -> [1,m]
h_t = o_t * np.tanh(c_t) #[1,m]*[1,m] (elementwise)
output[t,:,:] = h_t ; memory[t,:,:] = c_t
hprev = h_t # [1,m]
Cprev = c_t # [1,m]
return [output, memory]
主 LSTM 层
def lstm_layer(X, units, i=0, seq=False):
kernel_weights = model.layers[i].get_weights()[0]
recurrent_kernel_weights = model.layers[i].get_weights()[1]
bias = model.layers[i].get_weights()[2]
#Get the weights
# kernels
kernel_i = kernel_weights[:, :units]
kernel_f = kernel_weights[:, units: units * 2]
kernel_c = kernel_weights[:, units * 2: units * 3]
kernel_o = kernel_weights[:, units * 3:]
# recurrent kernel
h_i = recurrent_kernel_weights[:, :units]
h_f = recurrent_kernel_weights[:, units: units * 2]
h_c = recurrent_kernel_weights[:, units * 2: units* 3]
h_o = recurrent_kernel_weights[:, units * 3:]
# bias
bias_i = bias[:units]
bias_f = bias[units: units * 2]
bias_c = bias[units * 2: units * 3]
bias_o = bias[units * 3:]
x_i = np.dot(X,kernel_i) + bias_i
x_f = np.dot(X,kernel_f) + bias_f
x_c = np.dot(X,kernel_c) + bias_c
x_o = np.dot(X,kernel_o) + bias_o
# Set cell & memory states to zeros
h_tm1 = np.zeros((1, len(bias_i)))
c_tm1 = np.zeros((1, len(bias_i)))
h_tm1_i = h_tm1
h_tm1_f = h_tm1
h_tm1_c = h_tm1
h_tm1_o = h_tm1
[output, memory] = nsteps(x_i, x_f, x_o, x_c, h_tm1, c_tm1, h_i, h_f, h_o, h_c)
if seq:
return output
else:
output = output[:,timesteps-1,:]
output = output.reshape(1,units)
return output
现在是密集层
def dense(X, i=0):
W = model.layers[i].get_weights()[0]
b = model.layers[i].get_weights()[1]
output = np.dot(X, W) + b
return output
立即创建模型
data_dim = 28
timesteps = 32
num_classes = 2
units = 5
预期的输入数据形状:(batch_size、timesteps、data_dim)
model = Sequential()
model.add(LSTM(units, return_sequences=True, input_shape=(timesteps,
data_dim)))
model.add(LSTM(units))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
生成虚拟训练数据
x_train = np.random.random((1, timesteps, data_dim))
x_train = np.where(x_train > 0.5, 1.0, 0.0)
y_train = np.random.random((1, num_classes))
y_train = np.where(y_train > 0.5, 1.0, 0.0)
生成虚拟验证数据
x_val = np.random.random((1, timesteps, data_dim))
x_val = np.where(x_val > 0.5, 1.0, 0.0)
y_val = np.random.random((1, num_classes))
y_val = np.where(y_val > 0.5, 1.0, 0.0)
适合模型
model.fit(x_train, y_train,batch_size=1, epochs=1,validation_data=(x_val, y_val))
打印模型摘要
print(model.summary())
Numpy 预测模型
def predict_i(X, units):
print ('Shape of input to LSTM layer 1:', X.shape)
h = lstm_layer(X, units=units, i=0, seq=True) ; X = h
print('Shape of output from LSTM layer 1:', X.shape)
h = lstm_layer(X, units = units, i = 1, seq=False) ; X = h
print ('Shape of output from LSTM layer 2:', X.shape)
h = dense(X, i = 2) ; X = h
print ('Shape of output from dense layer:', X.shape)
h = softmax_2D(X) ; X = h[0]
return h
从 numpy 模型中获取预测
predict_i(x_train,units)
从 Keras 模型中获取预测
model.predict(x_train)
Keras 模型的预测与 numpy 模型不匹配。非常感谢任何帮助/反馈。