The code is below. It runs on 1 GPU. But fails on 2 or more GPU.
from transformers import AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer, AutoModelForCausalLM
from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
from torch.utils.data import TensorDataset, DataLoader,Dataset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("dolly-v2-3b")
model = AutoModelForCausalLM.from_pretrained("dolly-v2-3b",load_in_8bit=True,device_map='auto')
peft_config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.TEXT,
num_virtual_tokens=50,
prompt_tuning_init_text="Answer the question as truthfully as possible"
tokenizer_name_or_path="dolly-v2-3b"
)
model = get_peft_model(model, peft_config)
model.to(device)
train_data = [
{
"context": "How to Link Credit Card to ICICI Bank Account Step 1: "
"question": "How to add card?",
"answer": "Relevant. To add your card you can follow these steps: Step 1: "
},
{
"context": "The python programming language is "
"question": "What is Python used for?",
"answer": "Relevant. Python is used in many different fields in"
}
]
def preprocess_function(examples):
tokenized_examples = tokenizer(
examples["context"],
examples["question"],
truncation=True,
max_length=2048,
padding="max_length"
)
tokenized_examples['labels']=tokenizer(
examples["answer"],
truncation=True,
max_length=2048,
padding="max_length",
return_tensors="pt")['input_ids'][0]
return tokenized_examples
tokenized_train_data = [preprocess_function(example) for example in train_data]
class DemoDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
sample = self.data[idx]
item = {k: torch.tensor(v) for k, v in sample.items()}
return item
dataset = DemoDataset(tokenized_train_data)
training_args = TrainingArguments(
output_dir="results2",
learning_rate=1e-5,
per_device_train_batch_size=2,
num_train_epochs=2,
weight_decay=0.01,
logging_steps=1000,
save_strategy="epoch",
logging_dir="logs2"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset,
# data_collator=data_collator,
tokenizer=tokenizer
)
trainer.train()
ERROR
RuntimeError: Caught RuntimeError in replica 1 on device 1.
Original Traceback (most recent call last):
File "python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _workeroutput = module(*input, **kwargs)
File "python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs)
File "python3.8/site-packages/peft/peft_model.py",
line 723, in forward inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!
(when checking argument for argument tensors in method wrapper_CUDA_cat)