'BertEncoder' object has no attribute 'gradient_checkpointing'

I’m getting a strange error that previously worked OK. I’m only trying to use a previously trained NLP model to predict a label.

Source Code:

BATCH_SIZE = 128
MAX_LEN = 64
def generate_labels(model, tokenizer, df, segment_label):
  segments = list(df[segment_label].values)
  pred_labels = []
  num_batches = int(len(segments) / BATCH_SIZE) + 1 
  print('Total num batches: ', num_batches)
  model.eval()
  with torch.no_grad():
    for i in range(num_batches):
      if (i + 1) % 100 == 0:
        print('Processing batch #', (i + 1))
      
      batch = segments[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]
      tokenized_text = tokenizer(batch, return_tensors="pt", padding = 'max_length', \
                                             truncation = True, max_length = MAX_LEN)
      input_ids = tokenized_text['input_ids'].to('cuda')
      attention_mask = tokenized_text['attention_mask'].to('cuda')
      outputs = model(input_ids, attention_mask = attention_mask).logits 
      pred_labels_batch = torch.argmax(outputs, dim = 1).cpu().numpy()
      pred_labels.extend(pred_labels_batch)
  return pred_labels`Preformatted text`

pred_labels = generate_labels(trained_model, tokenizer, sent_df, 'text')

ERROR:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<command-98410652084852> in <module>
----> 1 pred_labels = generate_labels(trained_model, tokenizer, sent_df, 'text')
      2 sent_df['PREDICTED_DISCOURSE_TAG'] = [pubmed_rct_idx_to_label[x] for x in pred_labels]

<command-4224228297735130> in generate_labels(model, tokenizer, df, segment_label)
     17       input_ids = tokenized_text['input_ids'].to('cuda')
     18       attention_mask = tokenized_text['attention_mask'].to('cuda')
---> 19       outputs = model(input_ids, attention_mask = attention_mask).logits
     20       pred_labels_batch = torch.argmax(outputs, dim = 1).cpu().numpy()
     21       pred_labels.extend(pred_labels_batch)

/databricks/python/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/databricks/python/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
   1528         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
   1529 
-> 1530         outputs = self.bert(
   1531             input_ids,
   1532             attention_mask=attention_mask,

/databricks/python/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/databricks/python/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
    994             past_key_values_length=past_key_values_length,
    995         )
--> 996         encoder_outputs = self.encoder(
    997             embedding_output,
    998             attention_mask=extended_attention_mask,

/databricks/python/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/databricks/python/lib/python3.8/site-packages/transformers/models/bert/modeling_bert.py in forward(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
    558             past_key_value = past_key_values[i] if past_key_values is not None else None
    559 
--> 560             if self.gradient_checkpointing and self.training:
    561 
    562                 if use_cache:

/databricks/python/lib/python3.8/site-packages/torch/nn/modules/module.py in __getattr__(self, name)
   1128             if name in modules:
   1129                 return modules[name]
-> 1130         raise AttributeError("'{}' object has no attribute '{}'".format(
   1131             type(self).__name__, name))
   1132 

AttributeError: 'BertEncoder' object has no attribute 'gradient_checkpointing'

No idea where this is coming from. Any ideas or help?

Note that this error goes away completely if I use transformers==4.10.0

You did not show how you create your model in your post, so no one can help you debug the problem.

Form the error message, it looks like you used torch.save to save your whole model (and not the weights), which is not recommended at all because when the model changes (like it did between 4.10 and 4.11) you then can’t reload it directly with torch.load.

Our advice is to always use save_pretrained/from_pretrained to save/load your models or if it’s not possible, to save the weights (model.state_dict) with torch.save and then reload them with model.load_state_dict, as this will works across different versions of the models.

1 Like