Getting Error while adding new tokens in vocab

I am using a custom model

class BertMultiClassifier(nn.Module):
    def __init__(self, bert_model_path, labels_count, hidden_dim=768, dropout=0.1):
        super().__init__()

        self.config = {
            'bert_model_path': bert_model_path,
            'labels_count': labels_count,
            'hidden_dim': hidden_dim,
            'dropout': dropout,
        }

        self.bert = AutoModel.from_pretrained(bert_model_path)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(hidden_dim, labels_count)
        # self.linear = nn.Linear(hidden_dim, 1)
        # self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()

    def forward(self, input_ids, attention_mask):
        o = self.bert(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True)
        pooled_output = o['pooler_output']
        last_hidden_state = o['last_hidden_state']
        hidden_states = o['hidden_states']
        
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        # proba = self.sigmoid(linear_output)
        proba = self.softmax(linear_output)
        return proba, last_hidden_state, hidden_states

I want to add the new tokens in the vocab as instructed in this link https://github.com/huggingface/transformers/issues/1413#issuecomment-538083512.

But I am getting this error while resizing the token embeddings

# Let's increase the vocabulary of Bert model and tokenizer
num_added_tokens = tokenizer.add_tokens(new_tokens)

# only tokens different from those of the initial vocabulary will be added by the tokenizer.add_tokens() method

print(f'We have added {num_added_tokens} tokens')

# resize_token_embeddings expect to receive the full size of the new vocabulary, i.e., the length of the tokenizer.
model.resize_token_embeddings(len(tokenizer))
AttributeError                            Traceback (most recent call last)
/home/guest/experiment_with_rationales/train_the_bert_with_added_vocab.ipynb Cell 38' in <cell line: 9>()
      6 print(f'We have added {num_added_tokens} tokens')
      8 # resize_token_embeddings expect to receive the full size of the new vocabulary, i.e., the length of the tokenizer.
----> 9 model.resize_token_embeddings(len(tokenizer))

File ~/anaconda3/envs/hasoc/lib/python3.10/site-packages/torch/nn/modules/module.py:1185, in Module.__getattr__(self, name)
   1183     if name in modules:
   1184         return modules[name]
-> 1185 raise AttributeError("'{}' object has no attribute '{}'".format(
   1186     type(self).__name__, name))

AttributeError: 'BertMultiClassifier' object has no attribute 'resize_token_embeddings'

I don’t see where you called resize_token_embedding, but I assume you were calling on a model instance of your customized class by seeing your traceback. In your case, you should call the function on self.bert since it is the instance of the huggingface model class.

@bowenz It worked. Thanks