我正在尝试在 Keras 中实现负采样。我编写了以下仅计算损失的代码,并且我计划在启动并运行它后为 logits 添加一个额外的输出。这是代码:
from keras.layers import Dense
from keras.layers import Embedding
from keras.layers import Input, Concatenate, Masking, Layer
from keras.layers import LSTM
from keras.models import Model
import numpy as np
from keras.utils import to_categorical
import tensorflow as tf
vocab_size = 20
n_features = 3
batch_size = 16
sequence_len = 10
embedding_size = 10
# Inputs
looked = Input((sequence_len,), dtype='int32', name='in_seq')
features = Input((sequence_len, n_features,), dtype='float', name='in_aux')
labels = Input((1,), dtype='int32', name='labels_')
looked_m = Masking(mask_value=0)(looked)
in_embed = Embedding(output_dim=embedding_size, input_dim=vocab_size, input_length=sequence_len)(looked_m)
in_merged = Concatenate()([in_embed, features])
la_lstm = LSTM(256)(in_merged)
la_dense1 = Dense(128)(la_lstm)
loss = MyLayer(10, vocab_size, mode='train', name='my_layer')([la_dense1, labels])
model = Model(inputs=[looked, features, labels], outputs=[loss])
model.compile(loss=lambda loss, y_true: loss, optimizer='Adam')
这是我的自定义图层
class MyLayer(Layer):
def __init__(self, num_sampled, num_classes, mode, **kwargs):
self.num_sampled = num_sampled
self.num_classes = num_classes
self.mode = mode
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
dense_shape, classes_shape = input_shape
self.kernel = self.add_weight(name='kernel',
shape=(self.num_classes, dense_shape[1]),
initializer='uniform',
trainable=True)
self.bias = self.add_weight(name='bias',
shape=(self.num_classes,),
initializer='uniform',
trainable=True) # Maybe zero
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs_and_labels):
inputs, labels = inputs_and_labels
if self.mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=self.kernel,
biases=self.bias,
labels=labels,
inputs=inputs,
num_sampled=self.num_sampled,
num_classes=self.num_classes,
num_true=1)
elif self.mode == "eval":
logits = tf.matmul(inputs, tf.transpose(self.kernel))
logits = tf.nn.bias_add(logits, self.bias)
labels_one_hot = tf.one_hot(labels, self.num_classes)
loss = tf.nn.softmax_cross_entropy_with_logits(
labels=labels_one_hot,
logits=logits)
return loss
def compute_output_shape(self, input_shape):
dense_shape, classes_shape = input_shape
return (dense_shape[0], )
虚拟输入一切正常
# Test
y = np.arange(batch_size)
y_one_hot = to_categorical(y, vocab_size)
x_looked = np.array([np.random.choice(np.arange(vocab_size),
sequence_len,
replace=True) for _ in range(batch_size)]
)
x_features = np.random.rand(batch_size, sequence_len, n_features)
layer_name = 'my_layer'
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict([x_looked, x_features, y]) # OK
但是当我训练
# Here comes the problem
model.fit([x_looked, x_features, y], y_one_hot)
我明白了
Traceback (most recent call last):
File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-af27c0c040b8>", line 102, in <module>
model.fit([x_looked, x_features, y], y_one_hot)
File "/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 1689, in fit
self._make_train_function()
File "/anaconda3/lib/python3.6/site-packages/keras/engine/training.py", line 990, in _make_train_function
loss=self.total_loss)
File "/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/anaconda3/lib/python3.6/site-packages/keras/optimizers.py", line 440, in get_updates
grads = self.get_gradients(loss, params)
File "/anaconda3/lib/python3.6/site-packages/keras/optimizers.py", line 80, in get_gradients
raise ValueError('An operation has `None` for gradient. '
ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.