class bert_cls**(BertPreTrainedModel)**:
def init(self, config):
super(bert_cls, self).init(config)
self.num_labels = config.num_labels
self.config = config
self.pre_trained_model = BertModel(config)
cls_dropout = (config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob)
self.ccto_dropout = nn.Dropout(cls_dropout)
self.ccto_cls = nn.Linear(config.hidden_size, config.num_labels)
self.switch_dropout = nn.Dropout(cls_dropout)
self.switch_cls = nn.Linear(config.hidden_size, config.num_labels)
def forward(self, input_ids, mask_ids, segment_ids, ccto_label, switch_label):
outputs = self.pre_trained_model(
input_ids=input_ids,
attention_mask=mask_ids,
token_type_ids=segment_ids)
pooled_output = outputs.pooler_output
ccto_logits = self.ccto_cls(self.ccto_dropout(pooled_output))
switch_logits = self.switch_cls(self.switch_dropout(pooled_output))
ccto_loss, switch_loss = None, None
if ccto_label is not None and switch_label is not None:
loss_fct = nn.CrossEntropyLoss()
ccto_loss = loss_fct(ccto_logits, ccto_label.view(-1))
switch_loss = loss_fct(switch_logits, switch_label.view(-1))
return ccto_loss, switch_loss, ccto_logits, switch_logits
else:
return ccto_logits, switch_logits
The above function can be called by:
tokenizer = BertTokenizer.from_pretrained(‘bert-base-cased’)
config = BertConfig.from_pretrained(‘bert-base-cased’)
config.num_labels = 2
classifiers_model = bert_cls(config)
which works, also work if we change ‘Bert*’ to ‘Roberta’. However, it does not work in the following code:
class binary_cls**(PreTrainedModel):**
def init(self, config):
super(binary_cls, self).init(config)
self.num_labels = config.num_labels
self.config = config
**if args.model_name == 'BERT':**
** self.pre_trained_model = BertModel(config)**
** elif args.model_name == ‘RoBERTa’:**
** self.pre_trained_model = RobertaModel(config)**
cls_dropout = (config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob)
self.ccto_dropout = nn.Dropout(cls_dropout)
self.ccto_cls = nn.Linear(config.hidden_size, config.num_labels)
self.switch_dropout = nn.Dropout(cls_dropout)
self.switch_cls = nn.Linear(config.hidden_size, config.num_labels)
def forward(self, input_ids, mask_ids, segment_ids, ccto_label, switch_label):
outputs = self.pre_trained_model(
input_ids=input_ids,
attention_mask=mask_ids,
token_type_ids=segment_ids)
pooled_output = outputs.pooler_output
ccto_logits = self.ccto_cls(self.ccto_dropout(pooled_output))
switch_logits = self.switch_cls(self.switch_dropout(pooled_output))
ccto_loss, switch_loss = None, None
if ccto_label is not None and switch_label is not None:
loss_fct = nn.CrossEntropyLoss()
ccto_loss = loss_fct(ccto_logits, ccto_label.view(-1))
switch_loss = loss_fct(switch_logits, switch_label.view(-1))
return ccto_loss, switch_loss, ccto_logits, switch_logits
else:
return ccto_logits, switch_logits
It has the error:
config, model_kwargs = cls.config_class.from_pretrained(
AttributeError: ‘NoneType’ object has no attribute 'from_pretrained’