#seq2seq model implementation

1 messages · Page 1 of 1 (latest)

split yoke
#

Can someone help me on how to implement decoder in keras. I asked chatgpt but I dont why is it using a decoder again in the for loop
ChatGPT code:

# Define the inputs
encoder_output = tf.keras.Input(shape=(None, encoder_output_dim))
decoder_input = tf.keras.Input(shape=(None,))
target_sequence = tf.keras.Input(shape=(None,))

# Define the decoder
decoder = tf.keras.layers.LSTM(units=decoder_hidden_dim, return_sequences=True, return_state=True)
decoder_output, _, _ = decoder(decoder_input, initial_state=encoder_output)

# Define the output layer
output_layer = tf.keras.layers.Dense(units=output_vocab_size, activation='softmax')
output = output_layer(decoder_output)

# Define the decoding loop
max_output_length = ... # set the maximum output length
start_token = ... # set the start token
end_token = ... # set the end token
hidden_state = encoder_output
decoder_input = tf.expand_dims([start_token] * batch_size, 1)

for i in range(max_output_length):
    decoder_output, hidden_state, _ = decoder(decoder_input, initial_state=hidden_state)
    decoder_output = output_layer(decoder_output)
    next_token = tf.argmax(decoder_output, axis=-1)
    decoder_input = tf.concat([decoder_input, next_token], axis=-1)

    # stop decoding if end token is generated
    if tf.reduce_all(next_token == end_token):
        break

# Define the training and inference models
training_model = tf.keras.Model(inputs=[encoder_input, decoder_input, target_sequence], outputs=output)
inference_model = tf.keras.Model(inputs=encoder_input, outputs=output)
soft violet
#

Don't entirely get your question, but I'm guessing you are asking about the autoregressive generation; it's feeding output back into the input to get the subsequent output.