Hello there,
I just tried to summarize a text from my dataset using a custom fine-tuned BART model. To this end I followed the pytorch example listed here Summary of the tasks
This is the code that was used. And further below, the error is shown. Any ideas what could be the problem here?
from transformers import BartForConditionalGeneration, BartTokenizerFast
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
from datasets import load_dataset
raw_dataset = load_dataset(path='parquet', data_files={
'train': ['2021_1.parquet', '2021_2.parquet', '2021_3.parquet'],
'test': ['2021_4.parquet'
]})
model = BartForConditionalGeneration.from_pretrained("bart_nl_sum_17-04_15-50-23/checkpoints/checkpoint-10000")
tokenizer = BartTokenizerFast.from_pretrained("bart_nl_tiny_tz")
model_inputs = tokenizer(raw_dataset['train'][2]['description'], max_length=1024, return_tensors='pt', truncation=True) # padding=max_length
model_outputs = model.generate(
inputs=model_inputs["input_ids"],
max_length=150,
min_length=40,
length_penalty=2.0,
num_beams=4,
early_stopping=True
)
print(tokenizer.decode(model_outputs[0]))
Traceback (most recent call last):
File "legalsum_inf.py", line 22, in <module>
early_stopping=True
File "/opt/conda/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/transformers/generation_utils.py", line 1325, in generate
**model_kwargs,
File "/opt/conda/lib/python3.7/site-packages/transformers/generation_utils.py", line 2162, in beam_search
output_hidden_states=output_hidden_states,
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/transformers/models/bart/modeling_bart.py", line 1363, in forward
return_dict=return_dict,
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/transformers/models/bart/modeling_bart.py", line 1224, in forward
return_dict=return_dict,
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/transformers/models/bart/modeling_bart.py", line 793, in forward
raise ValueError("You have to specify either input_ids or inputs_embeds")
ValueError: You have to specify either input_ids or inputs_embeds