I added some layers to pretrained model and trained it. But when I wanted to push it to huggingface by calling push_to_hub(), it gave the error as shown in title. How to fix it?
import torch.nn as nn
class CustomQwen3Model(nn.Module):
def __init__(self, pretrained_model):
super(CustomQwen3Model, self).__init__()
self.base_model = pretrained_model
self.act_fn = nn.SiLU()
self.additional_layer = nn.Linear(151936, 22)
def forward(self, input_ids, attention_mask=None):
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
base_model_output = outputs.logits
output = self.act_fn(base_model_output)
output = self.additional_layer(output)
return output
my_model = CustomQwen3Model(qwen_model)
Below is my training args:
training_args = TrainingArguments(
output_dir=output_dir,
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=20,
weight_decay=0.01,
eval_strategy="epoch",
save_strategy="epoch",
# load_best_model_at_end=True,
push_to_hub=False,
save_total_limit=1,
fp16=True,
label_names=["label"],
save_safetensors=False,
)