Hello, I need to create a custom model for my research using the HuggingFace PreTrainedModel. I was wondering what would happen when I put my custom dropout into init, but when calling the model using .from_pretrained() or using model config, I change the hidden_dropout_prob and attention_probs_dropout_prob, to show what I mean I will put a little of my code here.
This is my model, where I assign self.dropout 0.5:
class RelationExtractionModel(PreTrainedModel):
config_class = AutoConfig
def __init__(self, model_config: AutoConfig, tokenizer: AutoTokenizer):
super().__init__(model_config)
self.model: AutoModel = AutoModel.from_pretrained(config.MODEL, config=model_config)
self.model.resize_token_embeddings(len(tokenizer))
self.tokenizer = tokenizer
# HERE
self.dropout = nn.Dropout(config.DROPOUT)
#
self.classifier = nn.Linear(model_config.hidden_size * 3, model_config.num_labels)
self.e1_start_id = tokenizer.convert_tokens_to_ids(consts.E1_START_TOKEN)
self.e2_start_id = tokenizer.convert_tokens_to_ids(consts.E2_START_TOKEN)
self.cls_token_id = tokenizer.cls_token_id
def forward(self, input_ids, attention_mask, labels=None, token_type_ids=None):
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = outputs.last_hidden_state
e1_mask = (input_ids == self.e1_start_id).unsqueeze(-1).expand(sequence_output.size())
entity_a = torch.sum(sequence_output * e1_mask, dim=1)
e2_mask = (input_ids == self.e2_start_id).unsqueeze(-1).expand(sequence_output.size())
entity_b = torch.sum(sequence_output * e2_mask, dim=1)
cls_mask = (input_ids == self.cls_token_id).unsqueeze(-1).expand(sequence_output.size())
cls_embedding = torch.sum(sequence_output * cls_mask, dim=1)
embedding = torch.cat([entity_a, entity_b, cls_embedding], dim=1)
embedding = self.dropout(embedding)
logits = self.classifier(embedding)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits, labels)
return {"loss": loss, "logits": logits} if labels is not None else {"logits": logits}
and call the model like this:
from utils.RE_utils.CERED.RE_model import RelationExtractionModel
model = RelationExtractionModel.from_pretrained(config.MODEL, tokenizer=tokenizer,
num_labels=len(id2label),
label2id=label2id, id2label=id2label,
hidden_dropout_prob=0.25,
attention_probs_dropout_prob=0.25)
where I put different values on purpose to show what I mean better.
My idea is, that the dropout on the hidden layers and for the attention probabilities will change to my assigned dropout in init, but I am not sure.