I am trying to fine tune the llama-2 model from using Peft
model_id = "meta-llama/Llama-2-7b-chat-hf"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=API)
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=3, quantization_config=bnb_config, device_map="auto", token=API)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=2,
lora_alpha=2,
target_modules=[
"q_proj",
"up_proj",
"o_proj",
"k_proj",
"down_proj",
"gate_proj",
"v_proj"],
lora_dropout=0.15,
bias="none"
)
model = get_peft_model(model, config)
data = load_dataset("FinanceInc/auditor_sentiment")
tokenizer.pad_token = tokenizer.eos_token
data = data.map(lambda samples:tokenizer(samples["sentence"], return_tensors='pt', padding=True), batched=True)
data = data.rename_column('label', 'labels')
data
The data:
DatasetDict({
train: Dataset({
features: [‘sentence’, ‘labels’, ‘input_ids’, ‘attention_mask’],
num_rows: 3877
})
test: Dataset({
features: [‘sentence’, ‘labels’, ‘input_ids’, ‘attention_mask’],
num_rows: 969
})
})
Now, the model has arguments ‘input_ids’ and ‘attention_mask’ as the code below generates a completely valid output:
input_ids = torch.tensor(data['train'][0]['input_ids'])
input_ids = torch.unsqueeze(input_ids, 0)
attention_mask = torch.tensor(data['train'][0]['attention_mask'])
attention_mask = torch.unsqueeze(attention_mask, 0)
output = model(input_ids=input_ids, attention_mask=attention_mask)
output
Output
SequenceClassifierOutputWithPast(loss={‘logits’: tensor([[-1.7373, 0.5537, 0.7510]], grad_fn=)}, logits=tensor([[-1.7373, 0.5537, 0.7510]], grad_fn=), past_key_values=None, hidden_states=None, attentions=None)
So I try and use the trainer class
trainer = Trainer(
model=model,
train_dataset=data['train'],
eval_dataset=data['test'],
args=TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
warmup_steps=2,
max_steps=10,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()
But I keep getting errors:
Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The model is quantized. To train this model you need to add additional modules inside the model such as adapters using `peft` library and freeze the model weights. Please check the examples in https://github.com/huggingface/peft for more details.
max_steps is given, it will override any value given in num_train_epochs
The following columns in the training set don't have a corresponding argument in `PeftModel.forward` and have been ignored: sentence, labels, attention_mask, input_ids. If sentence, labels, attention_mask, input_ids are not expected by `PeftModel.forward`, you can safely ignore this message.
***** Running training *****
Num examples = 0
Num Epochs = 1
Instantaneous batch size per device = 1
Total train batch size (w. parallel, distributed & accumulation) = 4
Gradient Accumulation steps = 4
Total optimization steps = 10
Number of trainable parameters = 5,009,408
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-59-d57a6efd24d9> in <cell line: 19>()
17 )
18 model.config.use_cache = False # silence the warnings. Please re-enable for inference!
---> 19 trainer.train()
11 frames
/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1537 hf_hub_utils.enable_progress_bars()
1538 else:
-> 1539 return inner_training_loop(
1540 args=args,
1541 resume_from_checkpoint=resume_from_checkpoint,
/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1797
1798 step = -1
-> 1799 for step, inputs in enumerate(epoch_iterator):
1800 total_batched_samples += 1
1801 if rng_to_sync:
/usr/local/lib/python3.10/dist-packages/accelerate/data_loader.py in __iter__(self)
382 # We iterate one batch ahead to check when we are at the end
383 try:
--> 384 current_batch = next(dataloader_iter)
385 except StopIteration:
386 yield
/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py in __next__(self)
631 # TODO(https://github.com/pytorch/pytorch/issues/76750)
632 self._reset() # type: ignore[call-arg]
--> 633 data = self._next_data()
634 self._num_yielded += 1
635 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
675 def _next_data(self):
676 index = self._next_index() # may raise StopIteration
--> 677 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
678 if self._pin_memory:
679 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
47 if self.auto_collation:
48 if hasattr(self.dataset, "__getitems__") and self.dataset.__getitems__:
---> 49 data = self.dataset.__getitems__(possibly_batched_index)
50 else:
51 data = [self.dataset[idx] for idx in possibly_batched_index]
/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py in __getitems__(self, keys)
2805 def __getitems__(self, keys: List) -> List:
2806 """Can be used to get a batch using a list of integers indices."""
-> 2807 batch = self.__getitem__(keys)
2808 n_examples = len(batch[next(iter(batch))])
2809 return [{col: array[i] for col, array in batch.items()} for i in range(n_examples)]
/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py in __getitem__(self, key)
2801 def __getitem__(self, key): # noqa: F811
2802 """Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools)."""
-> 2803 return self._getitem(key)
2804
2805 def __getitems__(self, keys: List) -> List:
/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py in _getitem(self, key, **kwargs)
2785 format_kwargs = format_kwargs if format_kwargs is not None else {}
2786 formatter = get_formatter(format_type, features=self._info.features, **format_kwargs)
-> 2787 pa_subtable = query_table(self._data, key, indices=self._indices if self._indices is not None else None)
2788 formatted_output = format_table(
2789 pa_subtable, key, formatter=formatter, format_columns=format_columns, output_all_columns=output_all_columns
/usr/local/lib/python3.10/dist-packages/datasets/formatting/formatting.py in query_table(table, key, indices)
581 else:
582 size = indices.num_rows if indices is not None else table.num_rows
--> 583 _check_valid_index_key(key, size)
584 # Query the main table
585 if indices is None:
/usr/local/lib/python3.10/dist-packages/datasets/formatting/formatting.py in _check_valid_index_key(key, size)
534 elif isinstance(key, Iterable):
535 if len(key) > 0:
--> 536 _check_valid_index_key(int(max(key)), size=size)
537 _check_valid_index_key(int(min(key)), size=size)
538 else:
/usr/local/lib/python3.10/dist-packages/datasets/formatting/formatting.py in _check_valid_index_key(key, size)
524 if isinstance(key, int):
525 if (key < 0 and key + size < 0) or (key >= size):
--> 526 raise IndexError(f"Invalid key: {key} is out of bounds for size {size}")
527 return
528 elif isinstance(key, slice):
IndexError: Invalid key: 2012 is out of bounds for size 0
How do I fix this? How do I find the corresponding arguments in the model’s defined forward method?