Hi,
I’m trying to use reformer-enwik8 to output prob for next character
Here is my code
import torch
import torch.nn.functional as F
from transformers import ReformerModelWithLMHead
def encode(list_of_strings, pad_token_id=0):
max_length = max([len(string) for string in list_of_strings])
# create emtpy tensors
attention_masks = torch.zeros((len(list_of_strings), max_length), dtype=torch.long)
input_ids = torch.full((len(list_of_strings), max_length), pad_token_id, dtype=torch.long)
for idx, string in enumerate(list_of_strings):
# make sure string is in byte format
if not isinstance(string, bytes):
string = str.encode(string)
input_ids[idx, :len(string)] = torch.tensor([x + 2 for x in string])
attention_masks[idx, :len(string)] = 1
return input_ids, attention_masks
# Decoding
def decode(outputs_ids):
decoded_outputs = []
for output_ids in outputs_ids.tolist():
# transform id back to char IDs < 2 are simply transformed to ""
decoded_outputs.append("".join([chr(x - 2) if x > 1 else "" for x in output_ids]))
return decoded_outputs
def main():
model = ReformerModelWithLMHead.from_pretrained("google/reformer-enwik8")
ids, masks = encode(["In 1965, Brooks left IBM to found the Department of"])
logits = model(ids, masks)["logits"]
output = decode(torch.argmax(logits, dim=-1))
the output is [’ t 96 a n aeroha o ahfaorsoithint nonehonohtro’], which does not seem to make sense. Actually I don’t know how to get the correct logtis from the model, if there are any thing wrong in the code, please tell me
Thanks!