I am using a custom model
class BertMultiClassifier(nn.Module):
def __init__(self, bert_model_path, labels_count, hidden_dim=768, dropout=0.1):
super().__init__()
self.config = {
'bert_model_path': bert_model_path,
'labels_count': labels_count,
'hidden_dim': hidden_dim,
'dropout': dropout,
}
self.bert = AutoModel.from_pretrained(bert_model_path)
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(hidden_dim, labels_count)
# self.linear = nn.Linear(hidden_dim, 1)
# self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax()
def forward(self, input_ids, attention_mask):
o = self.bert(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True, return_dict=True)
pooled_output = o['pooler_output']
last_hidden_state = o['last_hidden_state']
hidden_states = o['hidden_states']
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
# proba = self.sigmoid(linear_output)
proba = self.softmax(linear_output)
return proba, last_hidden_state, hidden_states
I want to add the new tokens in the vocab as instructed in this link https://github.com/huggingface/transformers/issues/1413#issuecomment-538083512.
But I am getting this error while resizing the token embeddings
# Let's increase the vocabulary of Bert model and tokenizer
num_added_tokens = tokenizer.add_tokens(new_tokens)
# only tokens different from those of the initial vocabulary will be added by the tokenizer.add_tokens() method
print(f'We have added {num_added_tokens} tokens')
# resize_token_embeddings expect to receive the full size of the new vocabulary, i.e., the length of the tokenizer.
model.resize_token_embeddings(len(tokenizer))
AttributeError Traceback (most recent call last)
/home/guest/experiment_with_rationales/train_the_bert_with_added_vocab.ipynb Cell 38' in <cell line: 9>()
6 print(f'We have added {num_added_tokens} tokens')
8 # resize_token_embeddings expect to receive the full size of the new vocabulary, i.e., the length of the tokenizer.
----> 9 model.resize_token_embeddings(len(tokenizer))
File ~/anaconda3/envs/hasoc/lib/python3.10/site-packages/torch/nn/modules/module.py:1185, in Module.__getattr__(self, name)
1183 if name in modules:
1184 return modules[name]
-> 1185 raise AttributeError("'{}' object has no attribute '{}'".format(
1186 type(self).__name__, name))
AttributeError: 'BertMultiClassifier' object has no attribute 'resize_token_embeddings'