Official document describes how to train with accelerate. But using accelerate with Trainer.train() aren’t explained.
Is is possible to do it?
2 Likes
try this:
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, AdamW
from accelerate import Accelerator
from datasets import load_dataset
import torch
# Initialize accelerator
accelerator = Accelerator()
# Load a dataset
dataset = load_dataset('text', data_files={'train': 'train.txt', 'test': 'test.txt'})
# Tokenization
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
def tokenize_function(examples):
# We are doing causal (unidirectional) masking
return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
dataset = dataset.map(tokenize_function, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask"])
# Split the dataset into train and test
train_dataset = dataset["train"]
test_dataset = dataset["test"]
# Initialize model
model = GPT2LMHeadModel.from_pretrained("gpt2")
optimizer = AdamW(model.parameters())
# Prepare everything with our `accelerator`.
model, optimizer, train_dataset, test_dataset = accelerator.prepare(model, optimizer, train_dataset, test_dataset)
# Now let's define our training loop
device = accelerator.device
model.train()
for epoch in range(3):
for step, batch in enumerate(train_dataset):
inputs = {k: v.to(device) for k, v in batch.items()}
outputs = model(**inputs)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()
# Evaluation logic
model.eval()
eval_loss = 0.0
eval_steps = 0
for batch in test_dataset:
with torch.no_grad():
inputs = {k: v.to(device) for k, v in batch.items()}
outputs = model(**inputs)
eval_loss += outputs.loss.item()
eval_steps += 1
eval_loss = eval_loss / eval_steps
print(f'Evaluation loss: {eval_loss}')
model.train()
let me know if it works.
Trainer uses accelerate now under the hood, so there is nothing needed to be done or added or changed
2 Likes