Hi Folks,
I have started learning and implementing models on huggingface, I was trying to fine tune T5 model on custom dataset but I am getting this error
—> 58 return data.pin_memory(device)
-
59 elif isinstance(data, (str, bytes)):*
-
60 return data*
RuntimeError: cannot pin ‘torch.cuda.LongTensor’ only dense CPU tensors can be pinned
tokenizer = AutoTokenizer.from_pretrained('t5-small')
def create_dataset_item(text, labels, model_name='t5-small'):
input_text = "generate text: " + text
target_text = labels
inputs = tokenizer(
input_text,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=1024,
)
lb = tokenizer(
target_text,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=128,
)
inputs['labels'] = lb['input_ids']
return {
'input_ids': inputs['input_ids'].squeeze().to('cuda'),
'attention_mask': inputs['attention_mask'].squeeze().to('cuda'),
'labels': inputs['labels'].squeeze().to('cuda')
}
def create_custom_dataset(data, model_name='t5-small'):
dataset = []
for _, item in data.iterrows():
dataset.append(create_dataset_item(item["text"], item["labels"], model_name))
return dataset
custom_dataset = create_custom_dataset(parsing_data)
model = AutoModelForSeq2SeqLM.from_pretrained('t5-small').to('cuda')
more …
training_args = Seq2SeqTrainingArguments(
output_dir="pdf-modeling-v1",
num_train_epochs=1,
per_device_train_batch_size=8,
save_steps=10_000,
save_total_limit=2,
learning_rate=5e-5,
use_cpu=False,
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
data_collator=None, # Use default collate function
train_dataset=custom_dataset,
)
# Train the model
trainer.train()
Please let me know if i am making any mistakes