Currently trying to train a mistral model for music gen. When using the trainer I am getting a strange permission error. I have checked all my files and they definitely allow read/write to that directory. IDE is in admin mode. Can’t figure out why it keeps throwing this error.
remi_trainer = Trainer(
model=remi_model,
args=training_config,
data_collator=remi_collator,
train_dataset=remi_dataset_train,
eval_dataset=remi_dataset_valid,
compute_metrics=compute_metrics,
callbacks=None,
preprocess_logits_for_metrics=preprocess_logits,
)
print("Training commencing....")
train_result = remi_trainer.train()
print("Training complete.")
remi_trainer.save_model() # Saves the tokenizer too
remi_trainer.log_metrics("train", train_result.metrics)
remi_trainer.save_metrics("train", train_result.metrics)
remi_trainer.save_state()
PermissionError Traceback (most recent call last)
Cell In[34], line 16
13 # Training
14 #remi_trainer.
15 print("Training commencing....")
---> 16 train_result = remi_trainer.train()
17 print("Training complete.")
18 remi_trainer.save_model() # Saves the tokenizer too
File ~\anaconda3\envs\GPU_Env\Lib\site-packages\transformers\trainer.py:1539, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1537 hf_hub_utils.enable_progress_bars()
1538 else:
-> 1539 return inner_training_loop(
1540 args=args,
1541 resume_from_checkpoint=resume_from_checkpoint,
1542 trial=trial,
1543 ignore_keys_for_eval=ignore_keys_for_eval,
1544 )
File ~\anaconda3\envs\GPU_Env\Lib\site-packages\transformers\trainer.py:1929, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1926 self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epoch
1927 self.control = self.callback_handler.on_step_end(args, self.state, self.control)
-> 1929 self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
1930 else:
1931 self.control = self.callback_handler.on_substep_end(args, self.state, self.control)
File ~\anaconda3\envs\GPU_Env\Lib\site-packages\transformers\trainer.py:2300, in Trainer._maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)
2297 self.lr_scheduler.step(metrics[metric_to_check])
2299 if self.control.should_save:
-> 2300 self._save_checkpoint(model, trial, metrics=metrics)
2301 self.control = self.callback_handler.on_save(self.args, self.state, self.control)
File ~\anaconda3\envs\GPU_Env\Lib\site-packages\transformers\trainer.py:2418, in Trainer._save_checkpoint(self, model, trial, metrics)
2415 os.rename(staging_output_dir, output_dir)
2417 # Ensure rename completed in cases where os.rename is not atomic
-> 2418 fd = os.open(output_dir, os.O_RDONLY)
2419 os.fsync(fd)
2420 os.close(fd)
PermissionError: [Errno 13] Permission denied: 'G:\\FYP\\Mistral\\Model_Predictions\\Version_1\\cps\\checkpoint-10'
using version
4.37.0