Hi everyone!
I am trying to load a model with instructions for PyTorch into a Tensorflow framework. tuner007/pegasus_paraphrase · Hugging Face
(I have M1 with accelerators for tensorflow and so far none for PyTorch, so most of my work is tensorflow)
I tested the code provided in the card in torch and it works without issues:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
def get_response(input_text,num_return_sequences,num_beams):
batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
translated = model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
return tgt_text
num_beams = 10
num_return_sequences = 5
context = "The ultimate test of your knowledge is your capacity to convey it to another."
get_response(context,num_return_sequences,num_beams)
So I’m trying to modify all of this for Tensorflow and got this far:
from transformers import TFPegasusForConditionalGeneration, PegasusTokenizer
tf_model_name = 'tuner007/pegasus_paraphrase'
tf_tokenizer = PegasusTokenizer.from_pretrained(tf_model_name, return_tensors='tf', max_length=512, truncation=True)
tf_model = TFPegasusForConditionalGeneration.from_pretrained(tf_model_name, from_pt=True)
def get_response_tf(input_text,num_return_sequences,num_beams):
batch = tf_tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="tf")
translated = tf_model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
tgt_text = tf_tokenizer.batch_decode(translated, skip_special_tokens=True)
return tgt_text
num_beams = 10
num_return_sequences = 5
context = "The ultimate test of your knowledge is your capacity to convey it to another."
get_response_tf(context,num_return_sequences,num_beams)
However when I run it I get this error:
InvalidArgumentError: slice index -22336 of dimension 0 out of bounds. [Op:StridedSlice] name: strided_slice/
Am I missing some step in conversion?
FULL ERROR TEXT:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
Input In [80], in <cell line: 4>()
2 num_return_sequences = 5
3 context = "The ultimate test of your knowledge is your capacity to convey it to another."
----> 4 get_response_tf(context,num_return_sequences,num_beams)
Input In [79], in get_response_tf(input_text, num_return_sequences, num_beams)
1 def get_response_tf(input_text,num_return_sequences,num_beams):
2 batch = tf_tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="tf")
----> 3 translated = tf_model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
4 tgt_text = tf_tokenizer.batch_decode(translated, skip_special_tokens=True)
5 return tgt_text
File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:736, in TFGenerationMixin.generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, output_scores, output_attentions, output_hidden_states, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, **model_kwargs)
731 assert (
732 cur_len < max_length
733 ), f"The context has {cur_len} number of tokens, but `max_length` is only {max_length}. Please make sure that `max_length` is bigger than the number of tokens, by setting either `generate(max_length=...,...)` or `config.max_length = ...`"
735 if num_beams > 1:
--> 736 output = self._generate_beam_search(
737 input_ids,
738 cur_len=cur_len,
739 max_length=max_length,
740 min_length=min_length,
741 do_sample=do_sample,
742 early_stopping=early_stopping,
743 temperature=temperature,
744 top_k=top_k,
745 top_p=top_p,
746 repetition_penalty=repetition_penalty,
747 no_repeat_ngram_size=no_repeat_ngram_size,
748 bad_words_ids=bad_words_ids,
749 pad_token_id=pad_token_id,
750 eos_token_id=eos_token_id,
751 batch_size=effective_batch_size,
752 num_return_sequences=num_return_sequences,
753 length_penalty=length_penalty,
754 num_beams=num_beams,
755 vocab_size=vocab_size,
756 encoder_outputs=encoder_outputs,
757 attention_mask=attention_mask,
758 use_cache=use_cache,
759 forced_bos_token_id=forced_bos_token_id,
760 forced_eos_token_id=forced_eos_token_id,
761 return_dict_in_generate=return_dict_in_generate,
762 **model_kwargs,
763 )
764 else:
765 output = self._generate_no_beam_search(
766 input_ids,
767 cur_len=cur_len,
(...)
785 **model_kwargs,
786 )
File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:1307, in TFGenerationMixin._generate_beam_search(self, input_ids, cur_len, max_length, min_length, do_sample, early_stopping, temperature, top_k, top_p, repetition_penalty, no_repeat_ngram_size, bad_words_ids, pad_token_id, eos_token_id, batch_size, num_return_sequences, length_penalty, num_beams, vocab_size, encoder_outputs, attention_mask, use_cache, forced_bos_token_id, forced_eos_token_id, return_dict_in_generate, **kwargs)
1304 beam_idx = tf.convert_to_tensor([x[2] for x in next_batch_beam], dtype=tf.int32)
1306 # re-order batch and update current length
-> 1307 input_ids = tf.stack([tf.identity(input_ids[x, :]) for x in beam_idx])
1308 input_ids = tf.concat([input_ids, tf.expand_dims(beam_tokens, 1)], axis=-1)
1309 cur_len = cur_len + 1
File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/transformers/generation_tf_utils.py:1307, in <listcomp>(.0)
1304 beam_idx = tf.convert_to_tensor([x[2] for x in next_batch_beam], dtype=tf.int32)
1306 # re-order batch and update current length
-> 1307 input_ids = tf.stack([tf.identity(input_ids[x, :]) for x in beam_idx])
1308 input_ids = tf.concat([input_ids, tf.expand_dims(beam_tokens, 1)], axis=-1)
1309 cur_len = cur_len + 1
File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153 raise e.with_traceback(filtered_tb) from None
154 finally:
155 del filtered_tb
File ~/miniforge3/envs/ml1/lib/python3.9/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
7105 def raise_from_not_ok_status(e, name):
7106 e.message += (" name: " + name if name is not None else "")
-> 7107 raise core._status_to_exception(e) from None
InvalidArgumentError: slice index -22336 of dimension 0 out of bounds. [Op:StridedSlice] name: strided_slice/