The code below is used to generate the next token probability using GPT2 at each timestamp for 8 iterations, however i would like to use it for encoder-decoder model such as T5 to display the probability of each word that is generated by decoder conditioned on the input (encoder)? I would like to convert the code to use it for T5 and get the logits of output instead of next word.
input_ids = tokenizer(“Some Input”, return_tensors=“pt”).input_ids
iterations =
n_steps = 8
choices_per_step = 5
with torch.no_grad():
for _ in range(n_steps):
iteration = dict()
iteration[“Input”] = tokenizer.decode(input_ids[0], skip_special_tokens=True)
output = model(input_ids=input_ids)
# Select logits of the first batch and the last token and apply softmax
next_token_logits = output.logits[0, -1, :]
next_token_probs = torch.softmax(next_token_logits, dim=-1)
sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)
# Store tokens with highest probabilities
for choice_idx in range(choices_per_step):
token_id = sorted_ids[choice_idx]
token_prob = next_token_probs[token_id].cpu().numpy()
token_choice = (
f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
)
iteration[f"Choice {choice_idx+1}"] = token_choice
# Append predicted next token to input
input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
iterations.append(iteration)
pd.DataFrame(iterations)