我正在使用带有 TensorFlow 2.0 的 tf.keras 编写带有 Bahdanau Attention 的编码器-解码器架构。下面是我的代码这适用于 TensorFlow 1.15,但在 2.0 中出现错误。你可以在这里查看 colab notebook 中的代码。你能告诉我代码有什么问题吗?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, Conv2D, BatchNormalization, Activation, Dropout, GRU, Embedding
from tensorflow.keras.models import Model
from tensorflow.keras import activations
from tensorflow.keras.layers import Layer
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.layers import GRU, concatenate, Lambda
ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20
VOCAB_SIZE = 500
units = 16
tf.keras.backend.clear_session()
class Encoder(Model):
def __init__(self, vocab_size, embedding_dim, input_length, units):
super(Encoder, self).__init__()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.input_length = input_length
self.units = units
self.embedding = Embedding(input_dim=VOCAB_SIZE, output_dim=50, input_length=self.input_length,
mask_zero=False, name="embedding_layer_encoder")
self.gru = GRU(self.units, return_state=True, return_sequences=True, name="Encoder_GRU")
@tf.function
def call(self, inputs, training=True):
x_embedd = self.embedding(inputs)
gru_output, gru_state = self.gru(x_embedd)
return gru_output, gru_state
class BahdanauAttention(tf.keras.layers.Layer):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# hidden shape == (batch_size, hidden size)
# # hidden_with_time_axis shape == (batch_size, 1, hidden size)
# # we are doing this to perform addition to calculate the score
hidden_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, 1)
# # we get 1 at the last axis because we are applying score to self.V
# # the shape of the tensor before applying self.V is (batch_size, max_length, units)
score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
# attention_weights shape == (batch_size, max_length, 1)
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector
class onestepDecoder(Model):
def __init__(self, vocab_size, embedding_dim, dec_units, att_units):
super(onestepDecoder, self).__init__()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.dec_units = dec_units
self.att_units = att_units
self.embedd = Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim,
input_length=1, mask_zero=False, name="Decoder_Embedding_layer")
self.att_layer = BahdanauAttention(units=self.att_units) #name='Attention')
self.dense = Dense(self.vocab_size, activation="softmax", name="DenseOut")
self.gru = GRU(units=self.dec_units, return_state=True, name="DecGRU")
@tf.function
def call(self, input_decoder, input_state, encoder_outputs, training=True):
x_embedd = self.embedd(input_decoder)
context_vector = self.att_layer(input_state, encoder_outputs )
concat = tf.concat([tf.expand_dims(context_vector, 1), x_embedd], axis=-1)
decoder_output, Decoder_state = self.gru(concat, initial_state=input_state)
output = self.dense(decoder_output)
return (output, Decoder_state)
class Decoder(Model):
def __init__(self, vocab_size, embedding_dim, dec_units, att_units):
super(Decoder, self).__init__()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.dec_units = dec_units
self.att_units = att_units
self.stepdec = onestepDecoder(self.vocab_size, self.embedding_dim, self.dec_units, self.att_units)
@tf.function
def call(self, input_decoder, input_state, encoder_outputs):
all_outputs= tf.TensorArray(tf.float32, size=input_decoder.shape[1], name="output_arrays")
for timestep in range(input_decoder.shape[1]):
output, input_state = self.stepdec(input_decoder[:,timestep:timestep+1], input_state, encoder_outputs)
all_outputs = all_outputs.write(timestep, output)
all_outputs = tf.transpose(all_outputs.stack(), [1, 0, 2])
return all_outputs
encoder_input = Input(shape=(ENCODER_SEQ_LEN,), name='encoder_input_final')
decoder_input = Input(shape=(DECODER_SEQ_LEN,), name="Decoder_inout_final")
encoder = Encoder(vocab_size=VOCAB_SIZE, embedding_dim=50, input_length=ENCODER_SEQ_LEN, units=16)
x_gru_out, x_gru_state = encoder(encoder_input)
decoder = Decoder(vocab_size=VOCAB_SIZE, embedding_dim=50, dec_units=16, att_units=20)
all_outputs = decoder(decoder_input, x_gru_state, x_gru_out)
encoder_decoder = Model([encoder_input, decoder_input], outputs=all_outputs)
encoder_decoder.compile(optimizer='adam',loss='sparse_categorical_crossentropy')
x = np.random.randint(0, 499, size=(2000, ENCODER_SEQ_LEN))
y = np.random.randint(0, 499, size=(2000, DECODER_SEQ_LEN))
encoder_decoder.fit(x=[x,y], y=y, epochs=1,verbose=1,batch_size=32)
错误:TypeError Traceback(最近一次调用最后一次)/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name,num_outputs,inputs,attrs,ctx,name)60 op_name, inputs, attrs, ---> 61 num_outputs) 62 除了 core._NotOkStatusException 为 e:
TypeError:函数构建代码之外的操作正在传递一个“图形”张量。通过在函数构建代码中包含 tf.init_scope ,可以使 Graph 张量从函数构建上下文中泄漏。例如,以下函数将失败:@tf.function def has_init_scope(): my_constant = tf.constant(1.) with tf.init_scope(): added = my_constant * 2 图张量的名称为:keras_learning_phase:0
在处理上述异常的过程中,又出现了一个异常:
_SymbolicException Traceback(最近一次调用最后)11 帧 /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/execute.py 在 quick_execute(op_name,num_outputs,inputs,attrs,ctx,name)73 raise core._SymbolicException( 74 "急切执行函数的输入不能是 Keras 符号" ---> 75 "张量,但找到 {}".format(keras_symbolic_tensors)) 76 raise e 77 # pylint: enable=protected-access
_SymbolicException: 急切执行函数的输入不能是 Keras 符号张量,但发现 []