I ran this code and it looks like I’m getting a recursion error:
def sd_data_collator(dataset_samples_list):
tokenizer = GPT2Tokenizer.from_pretrained('gpt2', padding_side='right')
tokenizer.pad_token = tokenizer.eos_token
encoded_results = tokenizer(dataset_samples_list, padding=True, truncation=True, return_tensors='pt', return_attention_mask=True)
batch = {}
batch['input_ids'] = torch.stack([result for result in encoded_results['input_ids']])
batch['past'] = None
batch['attention_mask'] = torch.stack([result for result in encoded_results['attention_mask']])
batch['position_ids'] = None
batch['head_mask'] = None
batch['inputs_embeds'] = None
batch['labels'] = None
batch['use_cache'] = True
return batch
sd_dataset = SDAbstractsDataset('/path/to/sd_samples_64.csv')
training_args = TrainingArguments(
output_dir='/path/to/finetuned_gpt2',
do_train=True,
per_device_train_batch_size=4,
learning_rate=1e-3,
num_train_epochs=1
)
model = GPT2FinetunedWithNgrams.from_pretrained('gpt2')
trainer = Trainer(
model=model,
args=training_args,
train_dataset=sd_dataset,
data_collator = sd_data_collator
)
trainer.train()
class GPT2FinetunedWithNgrams(GPT2LMHeadModel):
def __init__(self, config, model_tokenizer=None):
super().__init__(config)
self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2', padding_side='right')
self.tokenizer.pad_token = self.tokenizer.eos_token
def load_ngrams_model(self, ngrams_model_path):
self.ngrams_model = NGrams(ngrams_model_path)
def forward(
self,
input_ids=None,
past=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,
use_cache=True,
):
output = self.generate(input_ids=input_ids, max_length=470)
Here’s the whole error (it’s really lengthy):
Some weights of GPT2FinetunedWithNgrams were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch: 0%| | 0/1 [00:00<?, ?it/s]
Iteration: 0%| | 0/16 [00:00<?, ?it/s]Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence
.
.
.
File "/path/to/anaconda3/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 15, in decorate_context
return func(*args, **kwargs)
File "/path/to/anaconda3/lib/python3.7/site-packages/transformers/generation_utils.py", line 480, in generate
model_specific_kwargs=model_specific_kwargs,
File "/path/to/anaconda3/lib/python3.7/site-packages/transformers/generation_utils.py", line 520, in _generate_no_beam_search
outputs = self(**model_inputs)
File "/path/to/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/path/to/ric-2020/text_gen_w_transformers/finetune_gpt2.py", line 33, in forward
File "/path/to/anaconda3/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 15, in decorate_context
return func(*args, **kwargs)
File "/path/to/anaconda3/lib/python3.7/site-packages/transformers/generation_utils.py", line 350, in generate
"Setting `pad_token_id` to {} (first `eos_token_id`) to generate sequence".format(eos_token_id)
.
.
.
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 1390, in warning
self._log(WARNING, msg, args, **kwargs)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 1514, in _log
self.handle(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 1524, in handle
self.callHandlers(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 1594, in callHandlers
lastResort.handle(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 894, in handle
self.emit(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 1025, in emit
msg = self.format(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 869, in format
return fmt.format(record)
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 608, in format
record.message = record.getMessage()
File "/path/to/anaconda3/lib/python3.7/logging/__init__.py", line 360, in getMessage
def getMessage(self):
RecursionError: maximum recursion depth exceeded while calling a Python object
My guess is the self.generate()
being called within the model produces the recursion problem.
Is it possible to use the functionality within the generate
method (like beam search, top-k, etc.) without causing this recursion error during finetuning?