TypeError when loading a BERT model using TFAutoModel

When I load the bert-base-german-cased pytorch model using the following command lines:

from transformers.modeling_tf_auto import TFAutoModel
bert_model = TFAutoModel.from_pretrained(‘bert-base-german-cased’, from_pt=True, config=self.bert_config)

or the tensorflow checkpoint:

TFAutoModel.from_pretrained(‘path_to_bert_base_german_cased_tensorflow_checkpoint’, config=self.bert_config)

I get the following error:

   Traceback (most recent call last):
  File "train.py", line 23, in <module>
    model = util.get_model(config)
  File "/home/asi/PycharmProjects/coreference_resolution/util.py", line 21, in get_model
    return independent.CorefModel(config)
  File "/home/asi/PycharmProjects/coreference_resolution/independent.py", line 59, in __init__
    self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors)
  File "/home/asi/PycharmProjects/coreference_resolution/independent.py", line 263, in get_predictions_and_loss
    bert_model = TFAutoModel.from_pretrained('bert-base-german-cased', from_pt=True, config=self.bert_config)
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/transformers/modeling_tf_auto.py", line 423, in from_pretrained
    return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/transformers/modeling_tf_utils.py", line 482, in from_pretrained
    return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/transformers/modeling_tf_pytorch_utils.py", line 93, in load_pytorch_checkpoint_in_tf2_model
    tf_model, pt_state_dict, tf_inputs=tf_inputs, allow_missing_keys=allow_missing_keys
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/transformers/modeling_tf_pytorch_utils.py", line 125, in load_pytorch_weights_in_tf2_model
    tf_model(tf_inputs, training=False)  # Make sure model is built
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 854, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "/home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
    raise e.ag_error_metadata.to_exception(e)
TypeError: in converted code:
    relative to /home/asi/anaconda3/envs/coref_resol/lib/python3.6/site-packages:

    transformers/modeling_tf_bert.py:739 call  *
        outputs = self.bert(inputs, **kwargs)
    tensorflow_core/python/keras/engine/base_layer.py:854 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    transformers/modeling_tf_bert.py:606 call  *
        embedding_output = self.embeddings([input_ids, position_ids, token_type_ids, inputs_embeds], training=training)
    tensorflow_core/python/keras/engine/base_layer.py:894 __call__
        self._maybe_build(inputs)
    tensorflow_core/python/keras/engine/base_layer.py:2146 _maybe_build
        self.build(input_shapes)
    transformers/modeling_tf_bert.py:146 build
        initializer=get_initializer(self.initializer_range),
    tensorflow_core/python/keras/engine/base_layer.py:529 add_weight
        aggregation=aggregation)
    tensorflow_core/python/training/tracking/base.py:712 _add_variable_with_custom_getter
        **kwargs_for_getter)
    tensorflow_core/python/keras/engine/base_layer_utils.py:139 make_variable
        shape=variable_shape if variable_shape else None)
    tensorflow_core/python/ops/variables.py:258 __call__
        return cls._variable_v1_call(*args, **kwargs)
    tensorflow_core/python/ops/variables.py:219 _variable_v1_call
        shape=shape)
    tensorflow_core/python/ops/variables.py:197 <lambda>
        previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
    tensorflow_core/python/ops/variable_scope.py:2503 default_variable_creator
        shape=shape)
    tensorflow_core/python/ops/variables.py:262 __call__
        return super(VariableMetaclass, cls).__call__(*args, **kwargs)
    tensorflow_core/python/ops/resource_variable_ops.py:1406 __init__
        distribute_strategy=distribute_strategy)
    tensorflow_core/python/ops/resource_variable_ops.py:1537 _init_from_args
        initial_value() if init_from_fn else initial_value,
    tensorflow_core/python/keras/engine/base_layer_utils.py:119 <lambda>
        init_val = lambda: initializer(shape, dtype=dtype)
    tensorflow_core/python/ops/init_ops.py:369 __call__
        shape, self.mean, self.stddev, dtype, seed=self.seed)
    tensorflow_core/python/ops/random_ops.py:171 truncated_normal
        mean_tensor = ops.convert_to_tensor(mean, dtype=dtype, name="mean")
    tensorflow_core/python/framework/ops.py:1184 convert_to_tensor
        return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
    tensorflow_core/python/framework/ops.py:1242 convert_to_tensor_v2
        as_ref=False)
    tensorflow_core/python/framework/ops.py:1297 internal_convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    tensorflow_core/python/framework/tensor_conversion_registry.py:52 _default_conversion_function
        return constant_op.constant(value, dtype, name=name)
    tensorflow_core/python/framework/constant_op.py:227 constant
        allow_broadcast=True)
    tensorflow_core/python/framework/constant_op.py:265 _constant_impl
        allow_broadcast=allow_broadcast))
    tensorflow_core/python/framework/tensor_util.py:449 make_tensor_proto
        _AssertCompatible(values, dtype)
    tensorflow_core/python/framework/tensor_util.py:331 _AssertCompatible
        (dtype.name, repr(mismatch), type(mismatch).__name__))

    TypeError: Expected int32, got 0.0 of type 'float' instead.

Environment info is as follows:
transformers: 3.0.2
tensorflow and tensorflow-gpu: 1.15.2
platform: ubuntu 16.04
CUDA: V10.0.130
python: 3.6.10

Similar problem discussed in the following link when importing TFDistilBert model: ImportError: cannot import name 'TFDistilBertModel' · Issue #2641 · huggingface/transformers · GitHub but did not solve my issue.

Any clues how to fix this issue?

Thanks

I had the same problem - Dana