#The model code
model_path = “martin-ha/toxic-comment-model”
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.to(device)
#The dataset code
class GetData2(Dataset):
def init(self, texts, tokenizer, max_length):
self.texts = texts
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
encoded_text = self.tokenizer.encode_plus(
self.texts[idx],
add_special_tokens=True,
max_length=self.max_length,
padding='max_length',
return_tensors="pt",
truncation=True
)
return {
"input_ids": encoded_text["input_ids"].squeeze(),
"attention_mask": encoded_text["attention_mask"].squeeze()
}
#Creation of the Dataset
test_2 = GetData2(list(df_pd[‘body’]), tokenizer, 512)
Creation of the dataloader
Create a DataLoader and move data to GPU
dataloader = DataLoader(test_2, batch_size=128, shuffle=False)
Perform inference on GPU
model.eval()
predictions =
with torch.no_grad():
for batch in dataloader:
batch = {key: value.to(device) for key, value in batch.items()}
outputs = model(**batch)
probabilities = torch.softmax(outputs.logits, dim=1)
predictions.extend(probabilities.cpu().numpy())
And this is the task manager pic:
I have tried almost everything but for some reason this does not work. I have tried a different code where i create my own pytorch model and train and test it using gpu. It works over there and it is in the same environment. I do not want to fine-tune, i just want to use the pretained model on my gpu so that the predicitons can be faster. Any help would be appreciated. Thank you.