Trainers.train() with accelerate

Official document describes how to train with accelerate. But using accelerate with Trainer.train() aren’t explained.
Is is possible to do it?

1 Like

try this:

from transformers import GPT2LMHeadModel, GPT2TokenizerFast, AdamW
from accelerate import Accelerator
from datasets import load_dataset
import torch

# Initialize accelerator
accelerator = Accelerator()

# Load a dataset
dataset = load_dataset('text', data_files={'train': 'train.txt', 'test': 'test.txt'})

# Tokenization
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')

def tokenize_function(examples):
    # We are doing causal (unidirectional) masking
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)

dataset =, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask"])

# Split the dataset into train and test
train_dataset = dataset["train"]
test_dataset = dataset["test"]

# Initialize model
model = GPT2LMHeadModel.from_pretrained("gpt2")
optimizer = AdamW(model.parameters())

# Prepare everything with our `accelerator`.
model, optimizer, train_dataset, test_dataset = accelerator.prepare(model, optimizer, train_dataset, test_dataset)

# Now let's define our training loop
device = accelerator.device

for epoch in range(3):
    for step, batch in enumerate(train_dataset):
        inputs = {k: for k, v in batch.items()}
        outputs = model(**inputs)
        loss = outputs.loss

    # Evaluation logic
    eval_loss = 0.0
    eval_steps = 0

    for batch in test_dataset:
        with torch.no_grad():
            inputs = {k: for k, v in batch.items()}
            outputs = model(**inputs)
            eval_loss += outputs.loss.item()
        eval_steps += 1

    eval_loss = eval_loss / eval_steps
    print(f'Evaluation loss: {eval_loss}')


let me know if it works.

Trainer uses accelerate now under the hood, so there is nothing needed to be done or added or changed