when i use βtransformers.TrainingArgumentsβ and set (evaluation_strategy=βstepsβ,save_strategy=βstepsβ, eval_steps=200,) , i got loss errors.
my code:
trainer = ModifiedTrainer(
model=model,
train_dataset=train_data,
eval_dataset =val_data,
args=transformers.TrainingArguments(
per_device_train_batch_size=4,
gradient_accumulation_steps=1,
# warmup_steps=100,
num_train_epochs=4,
learning_rate=2e-6,
fp16=True,
logging_steps=10,
optim="adamw_torch",
# evaluation_strategy="steps",
save_strategy="steps",
# eval_steps=200,
save_steps=200,
# load_best_model_at_end=True,
output_dir='./fenlei',
save_total_limit=2,
# load_best_model_at_end=True,
# remove_unused_columns=True,
),
callbacks=[TensorBoardCallback(writer)],
data_collator=data_collator,)
it could be:
β β± 230 β trainer.train() β
β 231 β β
β 232 β writer.close() β
β 233 β # save model β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:1662 in β
β train β
β β
β 1659 β β inner_training_loop = find_executable_batch_size( β
β 1660 β β β self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size β
β 1661 β β ) β
β β± 1662 β β return inner_training_loop( β
β 1663 β β β args=args, β
β 1664 β β β resume_from_checkpoint=resume_from_checkpoint, β
β 1665 β β β trial=trial, β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:2006 in β
β _inner_training_loop β
β β
β 2003 β β β β β self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epo β
β 2004 β β β β β self.control = self.callback_handler.on_step_end(args, self.state, s β
β 2005 β β β β β β
β β± 2006 β β β β β self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_k β
β 2007 β β β β else: β
β 2008 β β β β β self.control = self.callback_handler.on_substep_end(args, self.state β
β 2009 β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:2287 in β
β _maybe_log_save_evaluate β
β β
β 2284 β β β β β ) β
β 2285 β β β β β metrics.update(dataset_metrics) β
β 2286 β β β else: β
β β± 2287 β β β β metrics = self.evaluate(ignore_keys=ignore_keys_for_eval) β
β 2288 β β β self._report_to_hp_search(trial, self.state.global_step, metrics) β
β 2289 β β β
β 2290 β β if self.control.should_save: β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:2995 in β
β evaluate β
β β
β 2992 β β start_time = time.time() β
β 2993 β β β
β 2994 β β eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else se β
β β± 2995 β β output = eval_loop( β
β 2996 β β β eval_dataloader, β
β 2997 β β β description=βEvaluationβ, β
β 2998 β β β # No point gathering the predictions if there are no metrics, otherwise we d β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:3176 in β
β evaluation_loop β
β β
β 3173 β β β β β batch_size = observed_batch_size β
β 3174 β β β β
β 3175 β β β # Prediction step β
β β± 3176 β β β loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_o β
β 3177 β β β inputs_decode = self._prepare_input(inputs[βinput_idsβ]) if args.include_inp β
β 3178 β β β β
β 3179 β β β if is_torch_tpu_available(): β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/transformers/trainer.py:3431 in β
β prediction_step β
β β
β 3428 β β β else: β
β 3429 β β β β if has_labels or loss_without_labels: β
β 3430 β β β β β with self.compute_loss_context_manager(): β
β β± 3431 β β β β β β loss, outputs = self.compute_loss(model, inputs, return_outputs= β
β 3432 β β β β β β # loss= self.compute_loss(model, inputs, return_outputs=True) β
β 3433 β β β β β β
β 3434 β β β β β loss = loss.mean().detach() β
β β
β /home/hexinyu/miniconda3/envs/nlp/lib/python3.10/site-packages/torch/_tensor.py:930 in iter β
β β
β 927 β β # NB: We have intentionally skipped torch_function dispatch here. β
β 928 β β # See gh-54457 β
β 929 β β if self.dim() == 0: β
β β± 930 β β β raise TypeError(βiteration over a 0-d tensorβ) β
β 931 β β if torch._C._get_tracing_state(): β
β 932 β β β warnings.warn( β
β 933 β β β β "Iterating over a tensor might cause the trace to be incorrect. " β
β°βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ―
TypeError: iteration over a 0-d tensor