Error on later checkpoint when doing generation using TextGenerationPipeline

Hi,

I am training my own GPT2 model using Bert tokenizer dealing with the Chinese characters.
I plan to train the model with 100 epochs and the model is saved on every 500 steps.
However, when I try to do generation testing on the model using the TextGenerationPipeline, it pops up an error starting from checkpoint-30000

RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

The TextGenerationPipeline works fine for the checkpoint before 30000 and I can’t figure out the reason of getting this error.
Here is my code snippet of text generation.

from transformers import BertTokenizer, GPT2LMHeadModel
tokenizer = BertTokenizer(vocab_file='./vocab.txt')
finetune_model = GPT2LMHeadModel.from_pretrained("model/pretrained/checkpoint-30000")
from transformers import TextGenerationPipeline, Text2TextGenerationPipeline
input_text = "你 好 嗎"
text_generator = TextGenerationPipeline(finetune_model, tokenizer)   
generated_text = text_generator([input_text], max_length=128, do_sample=True)

Traceback:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_5991/3783021088.py in <module>
      2 input_text = "你 好 嗎"
      3 text_generator = TextGenerationPipeline(finetune_model, tokenizer)
----> 4 generated_text = text_generator([input_text], max_length=128, do_sample=True)

../lib/python3.7/site-packages/transformers/pipelines/text_generation.py in __call__(self, text_inputs, **kwargs)
    148               -- The token ids of the generated text.
    149         """
--> 150         return super().__call__(text_inputs, **kwargs)
    151 
    152     def preprocess(self, prompt_text, prefix=""):

../lib/python3.7/site-packages/transformers/pipelines/base.py in __call__(self, inputs, num_workers, *args, **kwargs)
    915                     inputs, num_workers, preprocess_params, forward_params, postprocess_params
    916                 )
--> 917                 outputs = [output for output in final_iterator]
    918                 return outputs
    919             else:

../lib/python3.7/site-packages/transformers/pipelines/base.py in <listcomp>(.0)
    915                     inputs, num_workers, preprocess_params, forward_params, postprocess_params
    916                 )
--> 917                 outputs = [output for output in final_iterator]
    918                 return outputs
    919             else:

../lib/python3.7/site-packages/transformers/pipelines/base.py in __next__(self)
    631 
    632         def __next__(self):
--> 633             item = next(self.iterator)
    634             processed = self.infer(item, **self.params)
    635             return processed

../lib/python3.7/site-packages/transformers/pipelines/base.py in __next__(self)
    632         def __next__(self):
    633             item = next(self.iterator)
--> 634             processed = self.infer(item, **self.params)
    635             return processed
    636 

../lib/python3.7/site-packages/transformers/pipelines/base.py in forward(self, model_inputs, **forward_params)
    878                 with inference_context():
    879                     model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
--> 880                     model_outputs = self._forward(model_inputs, **forward_params)
    881                     model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
    882             else:

../lib/python3.7/site-packages/transformers/pipelines/text_generation.py in _forward(self, model_inputs, **generate_kwargs)
    163             input_ids = None
    164         prompt_text = model_inputs.pop("prompt_text")
--> 165         generated_sequence = self.model.generate(input_ids=input_ids, **generate_kwargs)  # BS x SL
    166         return {"generated_sequence": generated_sequence, "input_ids": input_ids, "prompt_text": prompt_text}
    167 

../lib/python3.7/site-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs)
     26         def decorate_context(*args, **kwargs):
     27             with self.__class__():
---> 28                 return func(*args, **kwargs)
     29         return cast(F, decorate_context)
     30 

../lib/python3.7/site-packages/transformers/generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, encoder_no_repeat_ngram_size, num_return_sequences, max_time, max_new_tokens, decoder_start_token_id, use_cache, num_beam_groups, diversity_penalty, prefix_allowed_tokens_fn, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, remove_invalid_values, synced_gpus, **model_kwargs)
   1025                 return_dict_in_generate=return_dict_in_generate,
   1026                 synced_gpus=synced_gpus,
-> 1027                 **model_kwargs,
   1028             )
   1029 

../lib/python3.7/site-packages/transformers/generation_utils.py in sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, **model_kwargs)
   1566             # sample
   1567             probs = nn.functional.softmax(next_token_scores, dim=-1)
-> 1568             next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
   1569 
   1570             # finished sentences should have their next token be a padding token

RuntimeError: probability tensor contains either `inf`, `nan` or element < 0