ValueError: Make sure that you pass in as many target sizes as the batch dimension of the logits

Hello, I have been getting this error :
"---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[209], line 1
----> 1 trainer.train()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2052, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
2050 hf_hub_utils.enable_progress_bars()
2051 else:
→ 2052 return inner_training_loop(
2053 args=args,
2054 resume_from_checkpoint=resume_from_checkpoint,
2055 trial=trial,
2056 ignore_keys_for_eval=ignore_keys_for_eval,
2057 )

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2467, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2464 self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epoch
2465 self.control = self.callback_handler.on_step_end(args, self.state, self.control)
→ 2467 self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
2468 else:
2469 self.control = self.callback_handler.on_substep_end(args, self.state, self.control)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2915, in Trainer._maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
2913 metrics = None
2914 if self.control.should_evaluate:
→ 2915 metrics = self._evaluate(trial, ignore_keys_for_eval)
2917 if self.control.should_save:
2918 self._save_checkpoint(model, trial, metrics=metrics)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2872, in Trainer._evaluate(self, trial, ignore_keys_for_eval, skip_scheduler)
2871 def _evaluate(self, trial, ignore_keys_for_eval, skip_scheduler=False):
→ 2872 metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
2873 self._report_to_hp_search(trial, self.state.global_step, metrics)
2875 # Run delayed LR scheduler now that metrics are populated

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3868, in Trainer.evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
3865 start_time = time.time()
3867 eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
→ 3868 output = eval_loop(
3869 eval_dataloader,
3870 description=“Evaluation”,
3871 # No point gathering the predictions if there are no metrics, otherwise we defer to
3872 # self.args.prediction_loss_only
3873 prediction_loss_only=True if self.compute_metrics is None else None,
3874 ignore_keys=ignore_keys,
3875 metric_key_prefix=metric_key_prefix,
3876 )
3878 total_batch_size = self.args.eval_batch_size * self.args.world_size
3879 if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:4160, in Trainer.evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
4156 metrics = self.compute_metrics(
4157 EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs)
4158 )
4159 else:
→ 4160 metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
4161 elif metrics is None:
4162 metrics = {}

File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator..decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
→ 116 return func(*args, **kwargs)

Cell In[191], line 59, in compute_metrics(evaluation_results, feature_extractor, threshold, id2label)
57 print(batch_boxes[50])
58 output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
—> 59 post_processed_output = feature_extractor.post_process_object_detection(
60 output, threshold=threshold, target_sizes=image_sizes
61 )
62 post_processed_predictions.extend(post_processed_output)
64 # Compute metrics

File /opt/conda/lib/python3.10/site-packages/transformers/models/detr/image_processing_detr.py:1796, in DetrImageProcessor.post_process_object_detection(self, outputs, threshold, target_sizes)
1794 if target_sizes is not None:
1795 if len(out_logits) != len(target_sizes):
→ 1796 raise ValueError(
1797 “Make sure that you pass in as many target sizes as the batch dimension of the logits”
1798 )
1800 prob = nn.functional.softmax(out_logits, -1)
1801 scores, labels = prob[…, :-1].max(-1)

ValueError: Make sure that you pass in as many target sizes as the batch dimension of the logits" …
This is my code on the metrics:
@dataclass
class ModelOutput:
logits: torch.Tensor
pred_boxes: torch.Tensor

from transformers import EvalPrediction

@torch.no_grad()
def compute_metrics(evaluation_results:EvalPrediction, feature_extractor, threshold=0.0, id2label=None):
print(evaluation_results)
predictions, targets = evaluation_results.predictions, evaluation_results.label_ids

# For metric computation we need to provide:
#  - targets in a form of list of dictionaries with keys "boxes", "labels"
#  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"

# for i in predictions:
#     print(i)
#     for x in i:
#         print(x)

# print(predictions[1])

image_sizes = []
post_processed_targets = []
post_processed_predictions = []

# Collect targets in the required format for metric computation
# for batch in targets:
#     for x in batch:
#         print(x)

# collect image sizes, we will need them for predictions post processing
# batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
# image_sizes.append(batch_image_sizes)

# here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
for image_target in targets:
    batch_image_sizes = torch.tensor(np.array(image_target["orig_size"]))
    boxes = torch.tensor(image_target["boxes"])
    boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"][:2]) #for i in range(len(boxes))]
    labels = torch.tensor(image_target["class_labels"])
    
    image_sizes.append(batch_image_sizes)
    post_processed_targets.append({"boxes": boxes, "labels": labels})
    
    # print(len(image_sizes))      

# model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
# for batch, target_sizes in zip(predictions, image_sizes):
    # print(batch)

# for i in range (len(batch)): 
batch_logits, batch_boxes = predictions[1], predictions[2]
print(batch_boxes[50])
output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
post_processed_output = feature_extractor.post_process_object_detection(
    output, threshold=threshold, target_sizes=image_sizes 
)
post_processed_predictions.extend(post_processed_output)

# Compute metrics
metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
metric.update(post_processed_predictions, post_processed_targets)
metrics = metric.compute()

# Replace list of per class metrics with separate metric for each class
classes = metrics.pop("classes")
map_per_class = metrics.pop("map_per_class")
mar_100_per_class = metrics.pop("mar_100_per_class")
for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
    class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
    metrics[f"map_{class_name}"] = class_map
    metrics[f"mar_100_{class_name}"] = class_mar

metrics = {k: round(v.item(), 4) for k, v in metrics.items()}

return metrics
1 Like
# Collect image sizes, ensuring the batch dimension is consistent
image_sizes = [torch.tensor(np.array(image_target["orig_size"])) for image_target in targets]
post_processed_targets = []
post_processed_predictions = []

# Process targets into the required format
for image_target in targets:
    boxes = torch.tensor(image_target["boxes"])
    boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"][:2])  # Convert to Pascal VOC format
    labels = torch.tensor(image_target["class_labels"])
    
    post_processed_targets.append({"boxes": boxes, "labels": labels})

# Process predictions from model output
batch_logits, batch_boxes = predictions[1], predictions[2]
output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))

# Ensure target_sizes aligns with the batch dimension of predictions
post_processed_output = feature_extractor.post_process_object_detection(
    output, threshold=threshold, target_sizes=image_sizes
)
post_processed_predictions.extend(post_processed_output)

# Compute metrics
metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
metric.update(post_processed_predictions, post_processed_targets)
metrics = metric.compute()

# Replace list of per-class metrics with separate metric for each class
classes = metrics.pop("classes")
map_per_class = metrics.pop("map_per_class")
mar_100_per_class = metrics.pop("mar_100_per_class")
for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
    class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
    metrics[f"map_{class_name}"] = class_map
    metrics[f"mar_100_{class_name}"] = class_mar

metrics = {k: round(v.item(), 4) for k, v in metrics.items()}

return metrics
1 Like

Thank you!

Just realized, I had left out eval_do_concat_batches=False in the training arguments.

1 Like