Using TFBertTokenizer with tf.data.Dataset

Hi there. I’m working on a model that combines the “Bert-base-uncased” model with a custom decoder that I’m writing myself. I’m running into issues passing my training data to the TFBertTokenizer. The training data being passed to the tokenizer is a scalar tensor of type tf.String.

training_data.element_spec
# ((TensorSpec(shape=(), dtype=tf.string, name=None),
  TensorSpec(shape=<unknown>, dtype=tf.int64, name=None)),
 TensorSpec(shape=<unknown>, dtype=tf.int64, name=None))

I then create my BertEncoder…

class BertEncoder(layers.Layer):
    def __init__(self, projection_dim):
        super().__init__()
        self.tokenizer = TFBertTokenizer.from_pretrained("bert-base-uncased")
        self.bert = TFBertModel.from_pretrained("bert-base-uncased")

    def call(self, input):
        x = self.tokenizer(input)
        x = self.bert(x)
        return x

Wrap it my larger model…

class MyCustomModel(keras.Model):
    def __init__(self, decoder_stack_height, d_model, h_model, decoder_dropout_rate, n_decoder_vocab):
        super().__init__()
        self.encoder = BertEncoder(d_model)
        self.decoder = MyCustomDecoder(decoder_stack_height,
                                       d_model,
                                       h_model,
                                       decoder_dropout_rate)
        # create some additional layers

    def call(self, input):
        # input is a tuple of `tf.data.Datasets` containing tensors of type `tf.String` and `tf.int32` respectively
        bill_text, prebill_series = input
        context = self.encoder(bill_text)
        # do some more stuff with context to create x
        return x

Then I call model.fit

my_model = MyCustomModel(...)
my_model.fit(train_data, batch_size=32, epochs=5, verbose=2)

The error I then get is:

AttributeError                            Traceback (most recent call last)
Cell In[48], line 1
----> 1 my_model.fit(train_data, batch_size=32, epochs=5, verbose=2)

File ~/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file359vbblq.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
     13 try:
     14     do_return = True
---> 15     retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
     16 except:
     17     do_return = False

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filel3wen_qt.py:11, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, input)
      9 retval_ = ag__.UndefinedReturnValue()
     10 (bill_text, prebill_series) = ag__.ld(input)
---> 11 context = ag__.converted_call(ag__.ld(self).encoder, (ag__.ld(bill_text),), None, fscope)
     12 x = ag__.converted_call(ag__.ld(self).embedding, (ag__.ld(prebill_series),), None, fscope)
     13 x = ag__.ld(x) + ag__.converted_call(ag__.ld(self).positional_encoding, (ag__.ld(x),), None, fscope)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileexf83jen.py:10, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, input)
      8 do_return = False
      9 retval_ = ag__.UndefinedReturnValue()
---> 10 x = ag__.converted_call(ag__.ld(self).tokenizer, (ag__.ld(input),), None, fscope)
     11 x = ag__.converted_call(ag__.ld(self).bert, (ag__.ld(x),), None, fscope)
     12 x = ag__.converted_call(ag__.ld(self).broadcaster, (ag__.ld(x).last_hidden_state,), None, fscope)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py:44, in outer_factory.<locals>.inner_factory.<locals>.tf__error_handler(*args, **kwargs)
     42         filtered_tb = ag__.Undefined('filtered_tb')
     43 filtered_tb = ag__.Undefined('filtered_tb')
---> 44 ag__.if_stmt(ag__.not_(ag__.converted_call(ag__.ld(tf).debugging.is_traceback_filtering_enabled, (), None, fscope)), if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
     45 return fscope.ret(retval_, do_return)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py:40, in outer_factory.<locals>.inner_factory.<locals>.tf__error_handler.<locals>.else_body()
     38 except Exception as e:
     39     filtered_tb = ag__.converted_call(ag__.ld(_process_traceback_frames), (ag__.ld(e).__traceback__,), None, fscope)
---> 40     raise ag__.converted_call(ag__.ld(e).with_traceback, (ag__.ld(filtered_tb),), None, fscope) from None
     41 finally:
     42     filtered_tb = ag__.Undefined('filtered_tb')

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py:34, in outer_factory.<locals>.inner_factory.<locals>.tf__error_handler.<locals>.else_body()
     32 try:
     33     do_return = True
---> 34     retval_ = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
     35 except:
     36     do_return = False

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py:242, in outer_factory.<locals>.inner_factory.<locals>.tf____call__(self, *args, **kwargs)
    240 call_context = ag__.Undefined('call_context')
    241 namescope_stack = ag__.Undefined('namescope_stack')
--> 242 ag__.if_stmt(ag__.converted_call(ag__.ld(_in_functional_construction_mode), (ag__.ld(self), ag__.ld(inputs), ag__.ld(args), ag__.ld(kwargs), ag__.ld(input_list)), None, fscope), if_body_11, else_body_11, get_state_11, set_state_11, ('do_return', "kwargs['mask']", 'retval_', 'args', 'input_list', 'inputs', 'kwargs'), 3)
    243 return fscope.ret(retval_, do_return)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py:187, in outer_factory.<locals>.inner_factory.<locals>.tf____call__.<locals>.else_body_11()
    185 ag__.if_stmt(ag__.ld(self)._autocast, if_body_7, else_body_7, get_state_7, set_state_7, ('inputs',), 1)
    186 with ag__.ld(autocast_variable).enable_auto_cast_variables(ag__.ld(self)._compute_dtype_object):
--> 187     outputs = ag__.converted_call(ag__.ld(call_fn), (ag__.ld(inputs),) + tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
    189 def get_state_8():
    190     return ()

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py:162, in outer_factory.<locals>.inner_factory.<locals>.tf__inject_argument_info_in_traceback.<locals>.error_handler(*args, **kwargs)
    160     new_e = ag__.Undefined('new_e')
    161     ag__.if_stmt(ag__.ld(arguments_context), if_body_5, else_body_5, get_state_6, set_state_6, ('new_e', 'arguments_context'), 1)
--> 162     raise ag__.converted_call(ag__.ld(new_e).with_traceback, (ag__.ld(e).__traceback__,), None, fscope_1) from None
    163 finally:
    164     signature = ag__.Undefined('signature')

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py:34, in outer_factory.<locals>.inner_factory.<locals>.tf__inject_argument_info_in_traceback.<locals>.error_handler(*args, **kwargs)
     32 try:
     33     do_return_1 = True
---> 34     retval__1 = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope_1)
     35 except:
     36     do_return_1 = False

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filez_q7mnw_.py:218, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, text, text_pair, padding, truncation, max_length, pad_to_multiple_of, return_token_type_ids, return_attention_mask)
    216     pass
    217 ag__.if_stmt(ag__.ld(text).shape.rank == 2, if_body_13, else_body_13, get_state_13, set_state_13, ('text', 'text_pair'), 2)
--> 218 text = ag__.converted_call(ag__.ld(self).unpaired_tokenize, (ag__.ld(text),), None, fscope)
    220 def get_state_16():
    221     return (input_ids, token_type_ids, text, text_pair)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file1sz285hp.py:26, in outer_factory.<locals>.inner_factory.<locals>.tf__unpaired_tokenize(self, texts)
     24     pass
     25 ag__.if_stmt(ag__.ld(self).do_lower_case, if_body, else_body, get_state, set_state, ('texts',), 1)
---> 26 tokens = ag__.converted_call(ag__.ld(self).tf_tokenizer.tokenize, (ag__.ld(texts),), None, fscope)
     27 try:
     28     do_return = True

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filetu0cba57.py:29, in outer_factory.<locals>.inner_factory.<locals>.tf__tokenize(self, text_input)
     27 do_return = False
     28 retval_ = ag__.UndefinedReturnValue()
---> 29 normalized_input = ag__.converted_call(ag__.ld(self)._fast_bert_normalizer.normalize, (ag__.ld(text_input),), None, fscope)
     30 try:
     31     do_return = True

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file3t60g1pt.py:26, in outer_factory.<locals>.inner_factory.<locals>.tf__normalize(self, input)
     24 do_return = False
     25 retval_ = ag__.UndefinedReturnValue()
---> 26 (normalized_texts, _) = ag__.converted_call(ag__.ld(self)._normalize_with_offsets_helper, (ag__.ld(input),), dict(get_offsets=False), fscope)
     27 try:
     28     do_return = True

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py:158, in outer_factory.<locals>.inner_factory.<locals>.tf___normalize_with_offsets_helper(self, input, get_offsets)
    156     normalized_texts = ag__.Undefined('normalized_texts')
    157     row_splits = ag__.Undefined('row_splits')
--> 158     ag__.if_stmt(ag__.ld(rank) == 0, if_body_6, else_body_6, get_state_6, set_state_6, ('do_return', 'retval_', 'input'), 2)
    159 return fscope.ret(retval_, do_return)

File /var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py:53, in outer_factory.<locals>.inner_factory.<locals>.tf___normalize_with_offsets_helper.<locals>.if_body_6()
     51 try:
     52     do_return = True
---> 53     retval_ = (ag__.ld(normalized_texts).values, ag__.if_exp(ag__.ld(get_offsets), lambda : ag__.ld(offsets).values, lambda : None, 'get_offsets'))
     54 except:
     55     do_return = False

AttributeError: in user code:

    File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filel3wen_qt.py", line 11, in tf__call
        context = ag__.converted_call(ag__.ld(self).encoder, (ag__.ld(bill_text),), None, fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileexf83jen.py", line 10, in tf__call
        x = ag__.converted_call(ag__.ld(self).tokenizer, (ag__.ld(input),), None, fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 44, in tf__error_handler
        ag__.if_stmt(ag__.not_(ag__.converted_call(ag__.ld(tf).debugging.is_traceback_filtering_enabled, (), None, fscope)), if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 40, in else_body
        raise ag__.converted_call(ag__.ld(e).with_traceback, (ag__.ld(filtered_tb),), None, fscope) from None
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 34, in else_body
        retval_ = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py", line 242, in tf____call__
        ag__.if_stmt(ag__.converted_call(ag__.ld(_in_functional_construction_mode), (ag__.ld(self), ag__.ld(inputs), ag__.ld(args), ag__.ld(kwargs), ag__.ld(input_list)), None, fscope), if_body_11, else_body_11, get_state_11, set_state_11, ('do_return', "kwargs['mask']", 'retval_', 'args', 'input_list', 'inputs', 'kwargs'), 3)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py", line 187, in else_body_11
        outputs = ag__.converted_call(ag__.ld(call_fn), (ag__.ld(inputs),) + tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 162, in error_handler
        raise ag__.converted_call(ag__.ld(new_e).with_traceback, (ag__.ld(e).__traceback__,), None, fscope_1) from None
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 34, in error_handler
        retval__1 = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope_1)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filez_q7mnw_.py", line 218, in tf__call
        text = ag__.converted_call(ag__.ld(self).unpaired_tokenize, (ag__.ld(text),), None, fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file1sz285hp.py", line 26, in tf__unpaired_tokenize
        tokens = ag__.converted_call(ag__.ld(self).tf_tokenizer.tokenize, (ag__.ld(texts),), None, fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filetu0cba57.py", line 29, in tf__tokenize
        normalized_input = ag__.converted_call(ag__.ld(self)._fast_bert_normalizer.normalize, (ag__.ld(text_input),), None, fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file3t60g1pt.py", line 26, in tf__normalize
        (normalized_texts, _) = ag__.converted_call(ag__.ld(self)._normalize_with_offsets_helper, (ag__.ld(input),), dict(get_offsets=False), fscope)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 158, in tf___normalize_with_offsets_helper
        ag__.if_stmt(ag__.ld(rank) == 0, if_body_6, else_body_6, get_state_6, set_state_6, ('do_return', 'retval_', 'input'), 2)
    File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 53, in if_body_6
        retval_ = (ag__.ld(normalized_texts).values, ag__.if_exp(ag__.ld(get_offsets), lambda : ag__.ld(offsets).values, lambda : None, 'get_offsets'))

    AttributeError: Exception encountered when calling layer 'my_custom_model_2' (type MyCustomModel).
    
    in user code:
    
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/ipykernel_83321/3228993956.py", line 31, in call  *
            context = self.encoder(bill_text)
        File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileexf83jen.py", line 10, in tf__call
            x = ag__.converted_call(ag__.ld(self).tokenizer, (ag__.ld(input),), None, fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 44, in tf__error_handler
            ag__.if_stmt(ag__.not_(ag__.converted_call(ag__.ld(tf).debugging.is_traceback_filtering_enabled, (), None, fscope)), if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 40, in else_body
            raise ag__.converted_call(ag__.ld(e).with_traceback, (ag__.ld(filtered_tb),), None, fscope) from None
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileixkodvsj.py", line 34, in else_body
            retval_ = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py", line 242, in tf____call__
            ag__.if_stmt(ag__.converted_call(ag__.ld(_in_functional_construction_mode), (ag__.ld(self), ag__.ld(inputs), ag__.ld(args), ag__.ld(kwargs), ag__.ld(input_list)), None, fscope), if_body_11, else_body_11, get_state_11, set_state_11, ('do_return', "kwargs['mask']", 'retval_', 'args', 'input_list', 'inputs', 'kwargs'), 3)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_fileismqzjpj.py", line 187, in else_body_11
            outputs = ag__.converted_call(ag__.ld(call_fn), (ag__.ld(inputs),) + tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 162, in error_handler
            raise ag__.converted_call(ag__.ld(new_e).with_traceback, (ag__.ld(e).__traceback__,), None, fscope_1) from None
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 34, in error_handler
            retval__1 = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope_1)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filez_q7mnw_.py", line 218, in tf__call
            text = ag__.converted_call(ag__.ld(self).unpaired_tokenize, (ag__.ld(text),), None, fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file1sz285hp.py", line 26, in tf__unpaired_tokenize
            tokens = ag__.converted_call(ag__.ld(self).tf_tokenizer.tokenize, (ag__.ld(texts),), None, fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filetu0cba57.py", line 29, in tf__tokenize
            normalized_input = ag__.converted_call(ag__.ld(self)._fast_bert_normalizer.normalize, (ag__.ld(text_input),), None, fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file3t60g1pt.py", line 26, in tf__normalize
            (normalized_texts, _) = ag__.converted_call(ag__.ld(self)._normalize_with_offsets_helper, (ag__.ld(input),), dict(get_offsets=False), fscope)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 158, in tf___normalize_with_offsets_helper
            ag__.if_stmt(ag__.ld(rank) == 0, if_body_6, else_body_6, get_state_6, set_state_6, ('do_return', 'retval_', 'input'), 2)
        File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 53, in if_body_6
            retval_ = (ag__.ld(normalized_texts).values, ag__.if_exp(ag__.ld(get_offsets), lambda : ag__.ld(offsets).values, lambda : None, 'get_offsets'))
    
        AttributeError: Exception encountered when calling layer 'bert_encoder_2' (type BertEncoder).
        
        in user code:
        
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/ipykernel_83321/3228993956.py", line 10, in call  *
                x = self.tokenizer(input)
            File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 998, in error_handler  *
                return fn(*args, **kwargs)
            File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__  *
                outputs = call_fn(inputs, *args, **kwargs)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 162, in error_handler  **
                raise ag__.converted_call(ag__.ld(new_e).with_traceback, (ag__.ld(e).__traceback__,), None, fscope_1) from None
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filebwu5sw92.py", line 34, in error_handler
                retval__1 = ag__.converted_call(ag__.ld(fn), tuple(ag__.ld(args)), dict(**ag__.ld(kwargs)), fscope_1)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filez_q7mnw_.py", line 218, in tf__call  **
                text = ag__.converted_call(ag__.ld(self).unpaired_tokenize, (ag__.ld(text),), None, fscope)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file1sz285hp.py", line 26, in tf__unpaired_tokenize  **
                tokens = ag__.converted_call(ag__.ld(self).tf_tokenizer.tokenize, (ag__.ld(texts),), None, fscope)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_filetu0cba57.py", line 29, in tf__tokenize  **
                normalized_input = ag__.converted_call(ag__.ld(self)._fast_bert_normalizer.normalize, (ag__.ld(text_input),), None, fscope)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file3t60g1pt.py", line 26, in tf__normalize  **
                (normalized_texts, _) = ag__.converted_call(ag__.ld(self)._normalize_with_offsets_helper, (ag__.ld(input),), dict(get_offsets=False), fscope)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 158, in tf___normalize_with_offsets_helper  **
                ag__.if_stmt(ag__.ld(rank) == 0, if_body_6, else_body_6, get_state_6, set_state_6, ('do_return', 'retval_', 'input'), 2)
            File "/var/folders/tt/ptdq0zld5qvg2z3pqs7lctf00000gn/T/__autograph_generated_file993afsy6.py", line 53, in if_body_6
                retval_ = (ag__.ld(normalized_texts).values, ag__.if_exp(ag__.ld(get_offsets), lambda : ag__.ld(offsets).values, lambda : None, 'get_offsets'))
        
            AttributeError: Exception encountered when calling layer 'tf_bert_tokenizer_3' (type TFBertTokenizer).
            
            in user code:
            
                File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/transformers/models/bert/tokenization_bert_tf.py", line 214, in call  *
                    text = self.unpaired_tokenize(text)
                File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/transformers/models/bert/tokenization_bert_tf.py", line 172, in unpaired_tokenize  *
                    tokens = self.tf_tokenizer.tokenize(texts)
                File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/tensorflow_text/python/ops/fast_bert_tokenizer.py", line 159, in tokenize  *
                    normalized_input = self._fast_bert_normalizer.normalize(text_input)
                File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/tensorflow_text/python/ops/fast_bert_normalizer.py", line 93, in normalize  *
                    normalized_texts, _ = self._normalize_with_offsets_helper(
                File "/Users/tomi/miniconda3/envs/WeThePeople/lib/python3.10/site-packages/tensorflow_text/python/ops/fast_bert_normalizer.py", line 157, in _normalize_with_offsets_helper  *
                    offsets.values if get_offsets else None)
            
                AttributeError: 'SymbolicTensor' object has no attribute 'values'
            
            
            Call arguments received by layer 'tf_bert_tokenizer_3' (type TFBertTokenizer):
              • text=tf.Tensor(shape=(), dtype=string)
              • text_pair=None
              • padding=None
              • truncation=None
              • max_length=None
              • pad_to_multiple_of=None
              • return_token_type_ids=None
              • return_attention_mask=None
        
        
        Call arguments received by layer 'bert_encoder_2' (type BertEncoder):
          • input=tf.Tensor(shape=(), dtype=string)
    
    
    Call arguments received by layer 'my_custom_model_2' (type MyCustomModel):
      • input=('tf.Tensor(shape=(), dtype=string)', 'tf.Tensor(shape=<unknown>, dtype=int64)')

Could anyone explain what’s going on/how to fix? Does TFBertTokenizer even work with tf.tensors at all?

I think inputs to the tokenizer should either be a string list of strings. Supplying a tf.string instead of a python string might be the cause. You can get return type of tensors from the tokenizer itself wit retursn_tensors = “tf”.

Yes I know that supplying tf.String is the issue. It just seems to me that supplying a tf.Dtype to TFBertTokenizer (i.e a tokenizer built to work with tensor flow) should “just work.” I think this is a feature request more than anything.

It doesn’t surprise me though. Hf Transformers are fully compatible with pytorch, but not with Tensorflow. So many of the things which you expect to happen just don’t.