Converting a PyTorch model into Tensorflow

Hi everyone!

I am trying to load a model with instructions for PyTorch into a Tensorflow framework. tuner007/pegasus_paraphrase · Hugging Face

(I have M1 with accelerators for tensorflow and so far none for PyTorch, so most of my work is tensorflow)

I tested the code provided in the card in torch and it works without issues:

import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

def get_response(input_text,num_return_sequences,num_beams):
  batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

num_beams = 10
num_return_sequences = 5
context = "The ultimate test of your knowledge is your capacity to convey it to another."
get_response(context,num_return_sequences,num_beams)

So I’m trying to modify all of this for Tensorflow and got this far:

from transformers import TFPegasusForConditionalGeneration, PegasusTokenizer

tf_model_name = 'tuner007/pegasus_paraphrase'
tf_tokenizer = PegasusTokenizer.from_pretrained(tf_model_name, return_tensors='tf', max_length=512, truncation=True)
tf_model = TFPegasusForConditionalGeneration.from_pretrained(tf_model_name, from_pt=True)

def get_response_tf(input_text,num_return_sequences,num_beams):
  batch = tf_tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="tf")
  translated = tf_model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = tf_tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

num_beams = 10
num_return_sequences = 5
context = "The ultimate test of your knowledge is your capacity to convey it to another."
get_response_tf(context,num_return_sequences,num_beams)

However when I run it I get this error:
InvalidArgumentError: slice index -22336 of dimension 0 out of bounds. [Op:StridedSlice] name: strided_slice/

Am I missing some step in conversion?

FULL ERROR TEXT:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [80], in <cell line: 4>()
      2 num_return_sequences = 5
      3 context = "The ultimate test of your knowledge is your capacity to convey it to another."
----> 4 get_response_tf(context,num_return_sequences,num_beams)

Input In [79], in get_response_tf(input_text, num_return_sequences, num_beams)
      1 def get_response_tf(input_text,num_return_sequences,num_beams):
      2   batch = tf_tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="tf")
----> 3   translated = tf_model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
      4   tgt_text = tf_tokenizer.batch_decode(translated, skip_special_tokens=True)
      5   return tgt_text

File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:736, in TFGenerationMixin.generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, output_scores, output_attentions, output_hidden_states, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, **model_kwargs)
    731 assert (
    732     cur_len < max_length
    733 ), f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or `config.max_length = ...`"
    735 if num_beams > 1:
--> 736     output = self._generate_beam_search(
    737         input_ids,
    738         cur_len=cur_len,
    739         max_length=max_length,
    740         min_length=min_length,
    741         do_sample=do_sample,
    742         early_stopping=early_stopping,
    743         temperature=temperature,
    744         top_k=top_k,
    745         top_p=top_p,
    746         repetition_penalty=repetition_penalty,
    747         no_repeat_ngram_size=no_repeat_ngram_size,
    748         bad_words_ids=bad_words_ids,
    749         pad_token_id=pad_token_id,
    750         eos_token_id=eos_token_id,
    751         batch_size=effective_batch_size,
    752         num_return_sequences=num_return_sequences,
    753         length_penalty=length_penalty,
    754         num_beams=num_beams,
    755         vocab_size=vocab_size,
    756         encoder_outputs=encoder_outputs,
    757         attention_mask=attention_mask,
    758         use_cache=use_cache,
    759         forced_bos_token_id=forced_bos_token_id,
    760         forced_eos_token_id=forced_eos_token_id,
    761         return_dict_in_generate=return_dict_in_generate,
    762         **model_kwargs,
    763     )
    764 else:
    765     output = self._generate_no_beam_search(
    766         input_ids,
    767         cur_len=cur_len,
   (...)
    785         **model_kwargs,
    786     )

File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:1307, in TFGenerationMixin._generate_beam_search(self, input_ids, cur_len, max_length, min_length, do_sample, early_stopping, temperature, top_k, top_p, repetition_penalty, no_repeat_ngram_size, bad_words_ids, pad_token_id, eos_token_id, batch_size, num_return_sequences, length_penalty, num_beams, vocab_size, encoder_outputs, attention_mask, use_cache, forced_bos_token_id, forced_eos_token_id, return_dict_in_generate, **kwargs)
   1304 beam_idx = tf.convert_to_tensor([x[2] for x in next_batch_beam], dtype=tf.int32)
   1306 # re-order batch and update current length
-> 1307 input_ids = tf.stack([tf.identity(input_ids[x, :]) for x in beam_idx])
   1308 input_ids = tf.concat([input_ids, tf.expand_dims(beam_tokens, 1)], axis=-1)
   1309 cur_len = cur_len + 1

File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:1307, in <listcomp>(.0)
   1304 beam_idx = tf.convert_to_tensor([x[2] for x in next_batch_beam], dtype=tf.int32)
   1306 # re-order batch and update current length
-> 1307 input_ids = tf.stack([tf.identity(input_ids[x, :]) for x in beam_idx])
   1308 input_ids = tf.concat([input_ids, tf.expand_dims(beam_tokens, 1)], axis=-1)
   1309 cur_len = cur_len + 1

File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: slice index -22336 of dimension 0 out of bounds. [Op:StridedSlice] name: strided_slice/