Unimplemented error when using summarization pipeline

Hi all,
I wanted to try writing a simple text summarization script using the summarization pipeline.

Here’s my code:

! wget -nc "https://www.dropbox.com/s/7hb8bwbtjmxovlc/bbc_text_cls.csv?dl=0"
# install transformers
! pip install transformers

from transformers import pipeline
import textwrap
import numpy as np
import pandas as pd
from pprint import pprint

def wrap(x):
  return textwrap.fill(x, replace_whitespace = False, fix_sentence_endings = True)

df = pd.read_csv('bbc_text_cls.csv?dl=0')
df.head()

doc = df[df.labels == 'business']['text'].sample(random_state=42)
summarizer = pipeline('summarization')
summarizer(doc.iloc[0].split('\n',1)[1])

I’m getting this error:

UnimplementedError                        Traceback (most recent call last)
/Users/vu/Desktop/Transformer/transformers.ipynb Cell 5 line 4
      1 summarizer = pipeline('summarization')
      3 article = '''
      4 UK retail sales fell in December,
      5 failing to meet expectations and making it by some counts the worst
   (...)
     44 trading until about Easter," said Mr Shaw.  "Our view is the Bank of
     45 England will keep its powder dry and wait to see the big picture.'''
---> 47 summarizer(article, max_length = 120, min_length = 30, do_sample = False)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/text2text_generation.py:265, in SummarizationPipeline.__call__(self, *args, **kwargs)
    241 def __call__(self, *args, **kwargs):
    242     r"""
    243     Summarize the text(s) given as inputs.
    244 
   (...)
    263           ids of the summary.
    264     """
--> 265     return super().__call__(*args, **kwargs)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/text2text_generation.py:165, in Text2TextGenerationPipeline.__call__(self, *args, **kwargs)
    136 def __call__(self, *args, **kwargs):
    137     r"""
    138     Generate the output text(s) using text(s) given as inputs.
    139 
   (...)
    162           ids of the generated text.
    163     """
--> 165     result = super().__call__(*args, **kwargs)
    166     if (
    167         isinstance(args[0], list)
    168         and all(isinstance(el, str) for el in args[0])
    169         and all(len(res) == 1 for res in result)
    170     ):
    171         return [res[0] for res in result]

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/base.py:1140, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
   1132     return next(
   1133         iter(
   1134             self.get_iterator(
   (...)
   1137         )
   1138     )
   1139 else:
-> 1140     return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/base.py:1147, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
   1145 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
   1146     model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1147     model_outputs = self.forward(model_inputs, **forward_params)
   1148     outputs = self.postprocess(model_outputs, **postprocess_params)
   1149     return outputs

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/base.py:1041, in Pipeline.forward(self, model_inputs, **forward_params)
   1039 if self.framework == "tf":
   1040     model_inputs["training"] = False
-> 1041     model_outputs = self._forward(model_inputs, **forward_params)
   1042 elif self.framework == "pt":
   1043     inference_context = self.get_inference_context()

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/pipelines/text2text_generation.py:187, in Text2TextGenerationPipeline._forward(self, model_inputs, **generate_kwargs)
    185 generate_kwargs["max_length"] = generate_kwargs.get("max_length", self.model.config.max_length)
    186 self.check_inputs(input_length, generate_kwargs["min_length"], generate_kwargs["max_length"])
--> 187 output_ids = self.model.generate(**model_inputs, **generate_kwargs)
    188 out_b = output_ids.shape[0]
    189 if self.framework == "pt":

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/generation/tf_utils.py:976, in TFGenerationMixin.generate(self, inputs, generation_config, logits_processor, seed, **kwargs)
    967     input_ids, model_kwargs = self._expand_inputs_for_generation(
    968         input_ids=input_ids,
    969         expand_size=generation_config.num_beams,
   (...)
    972         **model_kwargs,
    973     )
    975     # 12. run beam search
--> 976     return self.beam_search(
    977         input_ids,
    978         max_length=generation_config.max_length,
    979         pad_token_id=generation_config.pad_token_id,
    980         eos_token_id=generation_config.eos_token_id,
    981         length_penalty=generation_config.length_penalty,
    982         early_stopping=generation_config.early_stopping,
    983         logits_processor=logits_processor,
    984         output_scores=generation_config.output_scores,
    985         return_dict_in_generate=generation_config.return_dict_in_generate,
    986         num_return_sequences=generation_config.num_return_sequences,
    987         **model_kwargs,
    988     )
    990 elif is_beam_sample_gen_mode:
    991     if generation_config.num_beams < generation_config.num_return_sequences:

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/generation/tf_utils.py:2580, in TFGenerationMixin.beam_search(self, input_ids, do_sample, max_length, pad_token_id, eos_token_id, length_penalty, early_stopping, logits_processor, logits_warper, num_return_sequences, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, **model_kwargs)
   2567 # 2-to-n generation steps can then be run in autoregressive fashion (only in case 1st generation step does
   2568 # NOT yield EOS token though)
   2569 maximum_iterations = max_length - cur_len
   2570 (
   2571     cur_len,
   2572     running_sequences,
   2573     running_scores,
   2574     running_beam_indices,
   2575     sequences,
   2576     scores,
   2577     beam_indices,
   2578     is_sent_finished,
   2579     _,
-> 2580 ) = tf.while_loop(
   2581     beam_search_cond_fn,
   2582     beam_search_body_fn,
   2583     (
   2584         cur_len,
   2585         running_sequences,
   2586         running_scores,
   2587         running_beam_indices,
   2588         sequences,
   2589         scores,
   2590         beam_indices,
   2591         is_sent_finished,
   2592         model_kwargs,
   2593     ),
   2594     maximum_iterations=maximum_iterations,
   2595 )
   2597 # 6. prepare outputs
   2598 # Account for the edge-case where there are no finished sequences for a particular batch item. If so, return
   2599 # running sequences for that batch item.
   2600 none_finished = tf.math.reduce_any(is_sent_finished, axis=1)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/python/util/deprecation.py:629, in deprecated_arg_values.<locals>.deprecated_wrapper.<locals>.new_func(*args, **kwargs)
    622           _PRINTED_WARNING[(func, arg_name)] = True
    623         logging.warning(
    624             'From %s: calling %s (from %s) with %s=%s is deprecated and '
    625             'will be removed %s.\nInstructions for updating:\n%s',
    626             _call_location(), decorator_utils.get_qualified_name(func),
    627             func.__module__, arg_name, arg_value, 'in a future version'
    628             if date is None else ('after %s' % date), instructions)
--> 629 return func(*args, **kwargs)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/python/ops/control_flow_ops.py:2516, in while_loop_v2(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, maximum_iterations, name)
   2340 @tf_export("while_loop", v1=[])
   2341 @deprecation.deprecated_arg_values(
   2342     None,
   (...)
   2357                   maximum_iterations=None,
   2358                   name=None):
   2359   """Repeat `body` while the condition `cond` is true.
   2360 
   2361   `cond` is a callable returning a boolean scalar tensor. `body` is a callable
   (...)
   2514 
   2515   """
-> 2516   return while_loop(
   2517       cond=cond,
   2518       body=body,
   2519       loop_vars=loop_vars,
   2520       shape_invariants=shape_invariants,
   2521       parallel_iterations=parallel_iterations,
   2522       back_prop=back_prop,
   2523       swap_memory=swap_memory,
   2524       name=name,
   2525       maximum_iterations=maximum_iterations,
   2526       return_same_structure=True)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/python/ops/control_flow_ops.py:2765, in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations, return_same_structure)
   2762 loop_var_structure = nest.map_structure(type_spec.type_spec_from_value,
   2763                                         list(loop_vars))
   2764 while cond(*loop_vars):
-> 2765   loop_vars = body(*loop_vars)
   2766   if try_to_pack and not isinstance(loop_vars, (list, _basetuple)):
   2767     packed = True

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/python/ops/control_flow_ops.py:2756, in while_loop.<locals>.<lambda>(i, lv)
   2753     loop_vars = (counter, loop_vars)
   2754     cond = lambda i, lv: (  # pylint: disable=g-long-lambda
   2755         math_ops.logical_and(i < maximum_iterations, orig_cond(*lv)))
-> 2756     body = lambda i, lv: (i + 1, orig_body(*lv))
   2757   try_to_pack = False
   2759 if executing_eagerly:

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/generation/tf_utils.py:2351, in TFGenerationMixin.beam_search.<locals>.beam_search_body_fn(cur_len, running_sequences, running_scores, running_beam_indices, sequences, scores, beam_indices, is_sent_finished, model_kwargs)
   2347     input_ids = tf.expand_dims(running_sequences[:, :, cur_len - 1], -1)
   2348 model_inputs = self.prepare_inputs_for_generation(
   2349     flatten_beam_dim(input_ids), use_cache=use_cache, **model_kwargs
   2350 )
-> 2351 model_outputs = self(
   2352     **model_inputs,
   2353     return_dict=True,
   2354     output_attentions=output_attentions,
   2355     output_hidden_states=output_hidden_states,
   2356 )
   2357 logits = unflatten_beam_dim(model_outputs.logits[:, -1], num_beams)
   2359 # 2. Compute log probs
   2360 # get log probabilities from logits, process logits with processors (*e.g.* min_length, ...), and
   2361 # add new logprobs to existing running logprobs scores.

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/modeling_tf_utils.py:426, in unpack_inputs.<locals>.run_call_with_unpacked_inputs(self, *args, **kwargs)
    423     config = self.config
    425 unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
--> 426 return func(self, **unpacked_inputs)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/models/t5/modeling_tf_t5.py:1358, in TFT5ForConditionalGeneration.call(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, training)
   1355     decoder_input_ids = self._shift_right(labels)
   1357 # Decode
-> 1358 decoder_outputs = self.decoder(
   1359     decoder_input_ids,
   1360     attention_mask=decoder_attention_mask,
   1361     encoder_hidden_states=hidden_states,
   1362     encoder_attention_mask=attention_mask,
   1363     inputs_embeds=decoder_inputs_embeds,
   1364     head_mask=decoder_head_mask,
   1365     past_key_values=past_key_values,
   1366     use_cache=use_cache,
   1367     output_attentions=output_attentions,
   1368     output_hidden_states=output_hidden_states,
   1369     return_dict=return_dict,
   1370     training=training,
   1371 )
   1373 sequence_output = decoder_outputs[0]
   1375 # T5v1.1 does not tie output word embeddings and thus does not require downscaling

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/modeling_tf_utils.py:426, in unpack_inputs.<locals>.run_call_with_unpacked_inputs(self, *args, **kwargs)
    423     config = self.config
    425 unpacked_inputs = input_processing(func, config, **fn_args_and_kwargs)
--> 426 return func(self, **unpacked_inputs)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/models/t5/modeling_tf_t5.py:778, in TFT5MainLayer.call(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, encoder_head_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, training)
    776 if output_hidden_states:
    777     all_hidden_states = all_hidden_states + (hidden_states,)
--> 778 layer_outputs = layer_module(
    779     hidden_states,
    780     attention_mask=extended_attention_mask,
    781     position_bias=position_bias,
    782     encoder_hidden_states=encoder_hidden_states,
    783     encoder_attention_mask=encoder_extended_attention_mask,
    784     encoder_decoder_position_bias=encoder_decoder_position_bias,
    785     layer_head_mask=head_mask[idx] if head_mask is not None else None,
    786     encoder_layer_head_mask=encoder_head_mask[idx] if encoder_head_mask is not None else None,
    787     past_key_value=past_key_value,
    788     use_cache=use_cache,
    789     output_attentions=output_attentions,
    790     training=training,
    791 )
    793 # layer_outputs is a tuple with:
    794 # hidden-states, key-value-states, (self-attention weights), (self-attention position bias), (cross-attention weights), (cross-attention position bias)
    795 hidden_states, present_key_value_state = layer_outputs[:2]

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/models/t5/modeling_tf_t5.py:567, in TFT5Block.call(self, hidden_states, attention_mask, position_bias, encoder_hidden_states, encoder_attention_mask, encoder_decoder_position_bias, layer_head_mask, encoder_layer_head_mask, past_key_value, use_cache, output_attentions, training)
    564 else:
    565     self_attn_past_key_value, cross_attn_past_key_value = None, None
--> 567 self_attention_outputs = self.layer[0](
    568     hidden_states,
    569     attention_mask=attention_mask,
    570     position_bias=position_bias,
    571     layer_head_mask=layer_head_mask,
    572     past_key_value=self_attn_past_key_value,
    573     use_cache=use_cache,
    574     output_attentions=output_attentions,
    575     training=training,
    576 )
    577 hidden_states, present_key_value_state = self_attention_outputs[:2]
    578 attention_outputs = self_attention_outputs[2:]  # Keep self-attention outputs and relative position weights

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/models/t5/modeling_tf_t5.py:457, in TFT5LayerSelfAttention.call(self, hidden_states, attention_mask, position_bias, layer_head_mask, past_key_value, use_cache, output_attentions, training)
    445 def call(
    446     self,
    447     hidden_states,
   (...)
    454     training=False,
    455 ):
    456     normed_hidden_states = self.layer_norm(hidden_states)
--> 457     attention_output = self.SelfAttention(
    458         normed_hidden_states,
    459         mask=attention_mask,
    460         position_bias=position_bias,
    461         layer_head_mask=layer_head_mask,
    462         past_key_value=past_key_value,
    463         use_cache=use_cache,
    464         output_attentions=output_attentions,
    465         training=training,
    466     )
    467     hidden_states = hidden_states + self.dropout(attention_output[0], training=training)
    468     outputs = (hidden_states,) + attention_output[1:]  # add attentions if we output them

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/transformers/models/t5/modeling_tf_t5.py:396, in TFT5Attention.call(self, hidden_states, mask, key_value_states, position_bias, past_key_value, layer_head_mask, query_length, use_cache, training, output_attentions)
    392     else:
    393         # we might have a padded past structure, in which case we want to fetch the position bias slice
    394         # right after the most recently filled past index
    395         most_recently_filled_past_index = tf.reduce_max(tf.where(past_key_value[0][0, 0, :, 0] != 0.0))
--> 396         position_bias = dynamic_slice(
    397             position_bias,
    398             (0, 0, most_recently_filled_past_index + 1, 0),
    399             (1, self.n_heads, seq_length, real_seq_length),
    400         )
    402 if mask is not None:
    403     position_bias = tf.cast(position_bias, dtype=mask.dtype)

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/compiler/tf2xla/ops/gen_xla_ops.py:1356, in xla_dynamic_slice(input, start_indices, size_indices, name)
   1354   if _result is not NotImplemented:
   1355     return _result
-> 1356   return xla_dynamic_slice_eager_fallback(
   1357       input, start_indices, size_indices, name=name, ctx=_ctx)
   1358 except _core._SymbolicException:
   1359   pass  # Add nodes to the TensorFlow graph.

File ~/opt/anaconda3/envs/Transformers/lib/python3.9/site-packages/tensorflow/compiler/tf2xla/ops/gen_xla_ops.py:1408, in xla_dynamic_slice_eager_fallback(input, start_indices, size_indices, name, ctx)
   1406 _inputs_flat = [input, start_indices, size_indices]
   1407 _attrs = ("T", _attr_T, "Tindices", _attr_Tindices)
-> 1408 _result = _execute.execute(b"XlaDynamicSlice", 1, inputs=_inputs_flat,
   1409                            attrs=_attrs, ctx=ctx, name=name)
   1410 if _execute.must_record_gradient():
   1411   _execute.record_gradient(
   1412       "XlaDynamicSlice", _inputs_flat, _attrs, _result)

UnimplementedError: Exception encountered when calling layer 'SelfAttention' (type TFT5Attention).

{{function_node __wrapped__XlaDynamicSlice_device_/job:localhost/replica:0/task:0/device:CPU:0}} Could not find compiler for platform Host: NOT_FOUND: could not find registered compiler for platform Host -- was support for that platform linked in? [Op:XlaDynamicSlice]

Call arguments received by layer 'SelfAttention' (type TFT5Attention):
  • hidden_states=tf.Tensor(shape=(4, 1, 512), dtype=float32)
  • mask=tf.Tensor(shape=(4, 1, 1, 2), dtype=float32)
  • key_value_states=None
  • position_bias=None
  • past_key_value=('tf.Tensor(shape=(4, 8, 1, 64), dtype=float32)', 'tf.Tensor(shape=(4, 8, 1, 64), dtype=float32)')
  • layer_head_mask=None
  • query_length=None
  • use_cache=True
  • training=False
  • output_attentions=False

I’m not sure how to handle this error. I am using a Mac M1.
Any feedback is appreciated