Hello everyone,
I have been using the same code for fine-tuning CodeLLAMA2. The code was working great but suddenly the training process started crashing without an error message. I tried to run the code several times, but each time it crashed at a different step. I tried to update the transformers library and the problem persisted. Also, I monitored the GPUs memory usage and it was 5/24 and 7/24. I also noticed that the expected training time was reduced from 2hrs 30 min when the training process was working fine to less than one hour.
Here is the code I am using:
from datetime import datetime
import os
import sys
import faulthandler
import transformers
from datasets import load_dataset
from transformers.utils import logging
import datasets
import torch
import logging
from peft import (
LoraConfig,
get_peft_model,
get_peft_model_state_dict,
prepare_model_for_kbit_training,
set_peft_model_state_dict,
)
# we use this one for the run
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, \
BitsAndBytesConfig
# ===== Notes =========================
#
# =====================================
faulthandler.enable()
transformers.logging.set_verbosity_debug()
logger = transformers.logging.get_logger("transformers")
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
dataset = load_dataset('json', data_files=r"dataset.json", split="train")
train_dataset = dataset.train_test_split(test_size=0.1)["train"]
eval_dataset = dataset.train_test_split(test_size=0.1)["test"]
# print(eval_dataset[3])
# device_map = {
# "transformer.word_embeddings": 0,
# "transformer.word_embeddings_layernorm": 0,
# "lm_head": "gpu",
# "transformer.h": 0,
# "transformer.ln_f": 0,
# }
quantization_config = BitsAndBytesConfig(load_in_8bit_fp32_cpu_offload=True)
# Load the model
base_model = "codellama/CodeLlama-7b-hf"
output_dir = "XX"
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
quantization_config=quantization_config,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
# Tokenization
# Setup some tokenization settings like left padding because it makes training use less memory:
tokenizer.add_eos_token = True
tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"
# Setup the tokenize function to make labels and input_ids the same. This is basically what
# self-supervised fine-tuning is:
def tokenize(prompt):
result = tokenizer(
prompt,
truncation=True,
max_length=512,
padding=False,
return_tensors=None,
)
# "self-supervised learning" means the labels are also the inputs:
result["labels"] = result["input_ids"].copy()
return result
def generate_and_tokenize_prompt(data_point):
full_prompt =f"""YXXX
### Input:
{data_point["question"]}
### Context:
###{data_point["context"]}
### Response:
{data_point["answer"]}
"""
return tokenize(full_prompt)
# Reformat to prompt and tokenize each sample:
tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)
# setup LORA
model.train() # put model back into training mode
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=16, # Lora attention dimension
lora_alpha=16, # The alpha parameter for Lora scaling.
# target_modules: If this is not specified, modules will be chosen according to the model architecture.
# If the architecture is not
# known, an error will be raised — in this case, you should specify the target modules manually.
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
wandb_project = "X"
if len(wandb_project) > 0:
os.environ["WANDB_PROJECT"] = wandb_project
if torch.cuda.device_count() > 1:
# keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
model.is_parallelizable = True
model.model_parallel = True
batch_size = 64
per_device_train_batch_size = 4
gradient_accumulation_steps = batch_size // per_device_train_batch_size
training_args = TrainingArguments(
per_device_train_batch_size=per_device_train_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
warmup_steps=30,
max_steps=300,
push_to_hub=True,
learning_rate=3e-4,
fp16=True,
logging_steps=10,
optim="adamw_torch",
eval_strategy="steps", # if val_set_size > 0 else "no",
save_strategy="steps",
eval_steps=20,
save_steps=20,
output_dir=output_dir,
# save_total_limit=3,
#load_best_model_at_end=True,
# ddp_find_unused_parameters=False if ddp else None,
group_by_length=True, # group sequences of roughly the same length together to speed up training
report_to="wandb", # if use_wandb else "none",
run_name=f"codellama-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", # if use_wandb else None,
)
trainer = Trainer(
model=model,
train_dataset=tokenized_train_dataset,
eval_dataset=tokenized_val_dataset,
args=training_args,
data_collator=DataCollatorForSeq2Seq(
tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
),
)
model.config.use_cache = False
old_state_dict = model.state_dict
# model.state_dict = (lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())).__get__(
# model, type(model)
#)
if torch.__version__ >= "2" and sys.platform != "win32":
print("compiling the model")
model = torch.compile(model)
file_formatter = logging.Formatter(fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S", )
file_handler = logging.FileHandler(
os.path.join(training_args.output_dir, f"log.{os.getpid()}.{training_args.local_rank}.txt"))
file_handler.setFormatter(file_formatter)
logging.root.addHandler(file_handler)
trainer.train()
trainer.push_to_hub(model_name="X")
Here is the logs of the failed run:
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-20
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-40
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-60
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-80
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-100
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: answer, context, question. If answer, context, question are not expected by `PeftModelForCausalLM.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 49
Batch size = 8
Saving model checkpoint to network-code-llama\checkpoint-120
loading configuration file config.json from cache at C:\Users\ASUS\.cache\huggingface\hub\models--codellama--CodeLlama-7b-hf\snapshots\6c284d1468fe6c413cf56183e69b194dcfa27fe6\config.json
Model config LlamaConfig {
"_name_or_path": "codellama/CodeLlama-7b-hf",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 16384,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 32016
}
C:\Users\ASUS\anaconda3\envs\Alaa_env_2\lib\site-packages\torch\utils\checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
warnings.warn(
Even though the model should be using these columns. I searched for similar problems but nothing worked for my case. I would appreciate any help.