I am using the following code to calculate the perplexity of sentences and I need to know whether the score is normalized on sentence length. If not, what do I need to change to normalize it?
Thanks!
import torch
import sys
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# Load pre-trained model (weights)
with torch.no_grad():
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.eval()
# Load pre-trained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
def score(sentence):
tokenize_input = tokenizer.encode(sentence)
tensor_input = torch.tensor([tokenize_input])
loss=model(tensor_input, labels=tensor_input)[0]
return np.exp(loss.detach().numpy())
if __name__=='__main__':
for line in sys.stdin:
if line.strip() !='':
print(line.strip()+'\t'+ str(score(line.strip())))
else:
break