ValueError: too many values to unpack (expected 2) in text summarization. Possibly due to nested lists?

Hi, I am getting this error when executing Trainer.train()

All of my code works successfully when I am not using the return_tensors=“pt”).to(device) code during tokenizing.
I wanted to utilize my gpu which is why I changed my code. However, now I end up with the error ValueError: too many values to unpack (expected 2).

path = "/content/drive/MyDrive/pickle_files/pfizer_data.pkl"

df = pd.read_pickle(path) 
pfizer = Dataset.from_pandas(df)

test_size=0.33

pfizer = pfizer.shuffle().train_test_split(test_size=test_size)
pfizer

DatasetDict({
train: Dataset({
features: [‘text’, ‘summary’],
num_rows: 40
})
test: Dataset({
features: [‘text’, ‘summary’],
num_rows: 20
})
})

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google/pegasus-xsum")
def preprocess_function(examples):
    model_inputs = tokenizer(examples["text"], max_length=512, truncation=True, padding="max_length", return_tensors="pt").to(device)
    # Set up the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=512, truncation=True, padding="max_length", return_tensors="pt").to(device)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = pfizer.map(preprocess_function)
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

model = AutoModelForSeq2SeqLM.from_pretrained("google/pegasus-xsum")
model = model.to(device)

from transformers import TrainingArguments

training_args = Seq2SeqTrainingArguments(output_dir="test_trainer")
from transformers import TrainingArguments, Trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"]
)

Then when I run trainer.train() I get the error ValueError: too many values to unpack (expected 2).

I have compared the data set in each step to the data set when not using return_tensors=“pt” or to(device) functions, and it is only different in the amount of nested lists.

For example in my successful run my data set structure is as follows

tokenized_datasets["train"]["input_ids"]

[[2920,
8478,
30156,

5049,
1096,
18149,
112,
1008,
1],
[2920,
8478,
30156,
8233,
8478,
etc

Whereas it is as follows when I get the error

tokenized_datasets["train"]["input_ids"]

[[[2920,
8478,
30156,

5049,
1096,
18149,
112,
1008,
1]],
[[2920,
8478,
30156,
8233,
8478,
etc

So I am thinking the error might be due to the nested lists? If yes, how can I get rid of it or what is causing the nested lists?

I have tried things like

flat_list = [item for sublist in tokenized_datasets["train"]["input_ids"] for item in sublist]
tokenized_datasets["train"]["input_ids"] = flat_list

But that does not work and I get an TypeError: ‘Dataset’ object does not support item assignment.

Thank you

my whole error stack is as follows

----> 1 trainer.train()

9 frames
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1420 tr_loss_step = self.training_step(model, inputs)
1421 else:
→ 1422 tr_loss_step = self.training_step(model, inputs)
1423
1424 if (

/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in training_step(self, model, inputs)
2009
2010 with self.autocast_smart_context_manager():
→ 2011 loss = self.compute_loss(model, inputs)
2012
2013 if self.args.n_gpu > 1:

/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2041 else:
2042 labels = None
→ 2043 outputs = model(**inputs)
2044 # Save past state if it exists
2045 # TODO: this needs to be fixed and made cleaner later.

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = ,

/usr/local/lib/python3.7/dist-packages/transformers/models/pegasus/modeling_pegasus.py in forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1405 output_attentions=output_attentions,
1406 output_hidden_states=output_hidden_states,
→ 1407 return_dict=return_dict,
1408 )
1409 lm_logits = self.lm_head(outputs[0]) + self.final_logits_bias

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = ,

/usr/local/lib/python3.7/dist-packages/transformers/models/pegasus/modeling_pegasus.py in forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
1236 output_attentions=output_attentions,
1237 output_hidden_states=output_hidden_states,
→ 1238 return_dict=return_dict,
1239 )
1240 # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = ,

/usr/local/lib/python3.7/dist-packages/transformers/models/pegasus/modeling_pegasus.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)
767 if attention_mask is not None:
768 # [bsz, seq_len] → [bsz, 1, tgt_seq_len, src_seq_len]
→ 769 attention_mask = _expand_mask(attention_mask, inputs_embeds.dtype)
770
771 encoder_states = () if output_hidden_states else None

/usr/local/lib/python3.7/dist-packages/transformers/models/pegasus/modeling_pegasus.py in _expand_mask(mask, dtype, tgt_len)
96 Expands attention_mask from [bsz, seq_len] to [bsz, 1, tgt_seq_len, src_seq_len].
97 “”"
—> 98 bsz, src_len = mask.size()
99 tgt_len = tgt_len if tgt_len is not None else src_len
100

ValueError: too many values to unpack (expected 2)

Hi, did you found the solution to this i am facing similar issue with Flan T5