I’ve been trying to get top_p and top_k decoding to work with a translation model (following How to generate text: using different decoding methods for language generation with Transformers), but I don’t get enough variations, whereas if I use beam search, it works.
I created a script to test this (below), which you can run and the output that I get is at the bottom.
Am I missing something?
from transformers import MarianTokenizer, AutoModelForSeq2SeqLM
NUMBER_OF_BEAMS = 100
NUMBER_OF_SEQUENCES = 15
MAX_LENGTH = 60
TOP_K = 100
TOP_P = 0.9
forward_model_name = 'Helsinki-NLP/opus-mt-en-es'
print("Loading tokenizers and models started...")
tokenizer = MarianTokenizer.from_pretrained(forward_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(forward_model_name)
def decode_output_vectors(outputs, tokenizer):
translations = []
for i in range(len(outputs)):
translation = tokenizer.decode(outputs[i], skip_special_tokens=True)
translations.append(translation)
return translations
def get_translation_with_beams(input_text,
model,
tokenizer,
number_of_beams=NUMBER_OF_BEAMS,
num_of_returning_seqs=NUMBER_OF_SEQUENCES):
forward_input_ids = tokenizer.encode(input_text, return_tensors="pt")
output_vectors = model.generate(input_ids=forward_input_ids,
num_beams=number_of_beams,
num_return_sequences=num_of_returning_seqs)
return decode_output_vectors(output_vectors, tokenizer)
def get_translation_with_top_p(input_text,
model,
tokenizer,
top_p=TOP_P,
num_of_returning_seqs=NUMBER_OF_SEQUENCES):
forward_input_ids = tokenizer.encode(input_text, return_tensors="pt")
output_vectors = model.generate(input_ids=forward_input_ids,
do_sample=True,
top_p=top_p,
num_return_sequences=num_of_returning_seqs)
return decode_output_vectors(output_vectors, tokenizer)
def get_translation_with_top_k(input_text,
model,
tokenizer,
top_k=TOP_K,
num_of_returning_seqs=NUMBER_OF_SEQUENCES):
forward_input_ids = tokenizer.encode(input_text, return_tensors="pt")
output_vectors = model.generate(input_ids=forward_input_ids,
do_sample=True,
top_k=top_k,
num_return_sequences=num_of_returning_seqs)
return decode_output_vectors(output_vectors, tokenizer)
def print_translation(title, parameter_value, translations):
print("===============================")
print(title + " (" + str(parameter_value) + ")")
print("===============================")
for translation in list(set(translations)):
print(translation)
print()
print()
input_text = "This is such a special day in my life."
beam_translations = get_translation_with_beams(input_text, model, tokenizer)
print_translation("BEAM TRANSLATIONS", NUMBER_OF_BEAMS, beam_translations)
top_p_translations = get_translation_with_top_p(input_text, model, tokenizer)
print_translation("TOP P TRANSLATIONS", TOP_P, top_p_translations)
top_k_translations = get_translation_with_top_k(input_text, model, tokenizer)
print_translation("TOP K TRANSLATIONS", TOP_K, top_k_translations)
I get the following output
===============================
BEAM TRANSLATIONS (beams = 100)
Hoy es un día tan especial en mi vida.
Este es un día tan especial en mi vida.
Este día es muy especial en mi vida.
Este es un día muy especial en mi vida.
Este es un día especial en mi vida.
Este es un día muy especial de mi vida.
Esto es un día tan especial en mi vida.
Es un día especial en mi vida.
Este es un día tan especial de mi vida.
Este día es tan especial en mi vida.
Es un día tan especial en mi vida.
Éste es un día tan especial en mi vida.
Es un día muy especial en mi vida.
Es un día tan especial de mi vida.
Éste es un día muy especial en mi vida.
===============================
TOP P TRANSLATIONS (p = 0.9)
Este es un día tan especial en mi vida.
===============================
TOP K TRANSLATIONS (k = 100)
Este es un día tan especial en mi vida.