@sgugger hi, I’m trying to fine tune “meta-llama/Llama-2-7b” model in Kaggle notebook with (GPU T4 X 2), I’m noticing only one GPU is being used.
Sun Feb 25 14:06:28 2024
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03 Driver Version: 535.129.03 CUDA Version: 12.2 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |
| N/A 76C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+
| 1 Tesla T4 Off | 00000000:00:05.0 Off | 0 |
| N/A 77C P8 12W / 70W | 0MiB / 15360MiB | 0% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| No running processes found |
+---------------------------------------------------------------------------------------+
import pandas as pd
import torch
from datasets import Dataset, load_dataset
from random import randrange
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, prepare_model_for_int8_training
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("train.csv")
train = Dataset.from_pandas(df)
model_id = "meta-llama/Llama-2-7b"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
model = AutoModelForCausalLM.from_pretrained(model_id,
load_in_4bit = True,
torch_dtype = torch.float16,
device_map="auto")
model.resize_token_embeddings(len(tokenizer))
model = prepare_model_for_int8_training(model)
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
args = TrainingArguments(
output_dir='custom_domain_test',
num_train_epochs=5,
per_device_train_batch_size=8,
optim = "adamw_torch",
logging_steps = 100,
save_total_limit = 2,
save_strategy = "no",
load_best_model_at_end=False,
learning_rate=2e-4,
fp16=True,
seed=42,
warmup_ratio = 0.1,
lr_scheduler_type = "linear",
report_to="none",
dataloader_num_workers = 4
)
# Create the trainer
trainer = SFTTrainer(
model=model,
train_dataset=train,
# eval_dataset=test,
dataset_text_field='text',
peft_config=peft_config,
max_seq_length=1042,
tokenizer=tokenizer,
args=args,
packing=True,
)
# train
trainer.train()
can you please tell me how to utilise both the GPU and increase GPU and CPU utilisation using HuggingFace Trainer?