See github issue link: Discrepancy in Model Inference: Local vs. Hugging Face Model Hub · Issue #25362 · huggingface/transformers · GitHub
System Info
transformers
version: 4.31.0- Platform: Linux-5.15.109±x86_64-with-glibc2.35
- Python version: 3.10.12
- Huggingface_hub version: 0.16.4
- Safetensors version: 0.3.1
- Accelerate version: 0.21.0
- Accelerate config: not found
- PyTorch version (GPU?): 2.0.1+cu118 (True)
- Tensorflow version (GPU?): 2.12.0 (True)
- Flax version (CPU?/GPU?/TPU?): 0.7.0 (gpu)
- Jax version: 0.4.13
- JaxLib version: 0.4.13
- Using GPU in script?: yes
- Using distributed or parallel set-up in script?: no
Who can help?
@ArthurZucker @youne
Information
- The official example scripts
- My own modified scripts
Tasks
- An officially supported task in the
examples
folder (such as GLUE/SQuAD, …) - My own task or dataset (give details below)
Reproduction
- Train a model with
from transformers import RobertaConfig, RobertaModel, RobertaTokenizer
import torch
import torch.nn.functional as F
class RobertaForRegression(RobertaModel):
def __init__(self, config: RobertaConfig):
super().__init__(config)
self.regressor = torch.nn.Linear(config.hidden_size, int(config.hidden_size / 2))
self.regressor2 = torch.nn.Linear(int(config.hidden_size / 2), int(config.hidden_size / 4))
self.regressor3 = torch.nn.Linear(int(config.hidden_size / 4), int(config.hidden_size / 8))
self.regressor4 = torch.nn.Linear(int(config.hidden_size / 8), 1)
def forward(self, input_ids, attention_mask, labels=None):
outputs = super().forward(input_ids=input_ids, attention_mask=attention_mask)
regression_output = F.relu(self.regressor(outputs.last_hidden_state[:, 0].squeeze()))
regression_output = F.relu(self.regressor2(regression_output))
regression_output = F.relu(self.regressor3(regression_output))
regression_output = self.regressor4(regression_output)
return regression_output
# Now, let's load the pre-trained model from the repository
config = RobertaConfig.from_pretrained("repo")
frozen_base_model = RobertaForRegression.from_pretrained("repo", config=config)
# Freeze all the parameters in the base model
for param in frozen_base_model.base_model.parameters():
param.requires_grad = False
# Ensure the parameters in the regression head are trainable
for param in frozen_base_model.regressor.parameters():
param.requires_grad = True
#for param in frozen_base_model.regressor2.parameters():
# param.requires_grad = True
#for param in frozen_base_model.regressor3.parameters():
# param.requires_grad = True
#for param in frozen_base_model.regressor4.parameters():
# param.requires_grad = True
unfrozen_base_model = RobertaForRegression.from_pretrained("repo", config=config)
tokenizer = RobertaTokenizer.from_pretrained("repo")
# Replace the base RoBERTa model in RobertaForRegression with the pre-trained model
model = RobertaForRegression(config)
from transformers import TrainerCallback
class PushToHubCallback(TrainerCallback):
def __init__(self, trainer, model_name):
super().__init__()
self.trainer = trainer
self.model_name = model_name
def on_epoch_end(self, args, state, control, **kwargs):
print("saving model to {}".format(f"repo"))
self.trainer.model.push_to_hub(f"repo", use_auth_token=True)
model_weights = self.trainer.model.state_dict()
uploaded_model = RobertaForRegression.from_pretrained(f"repo", use_auth_token=True)
uploaded_model_weights = uploaded_model.state_dict()
for (name1, tensor1), (name2, tensor2) in zip(model_weights.items(), uploaded_model_weights.items()):
try:
assert name1 == name2, f"Name mismatch: {name1} vs. {name2}"
assert torch.equal(tensor1.cpu(), tensor2.cpu()), f"Tensor mismatch for {name1}"
except AssertionError as e:
print(e)
from transformers import Trainer, TrainingArguments
import wandb
from transformers import set_seed
import torch
# Set the seed value
set_seed(38)
class RegressionTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.pop("labels")
outputs = model(**inputs)
logits = outputs.squeeze()
input_ids = inputs.get("input_ids", None)
if input_ids is not None:
original_texts = [tokenizer.decode(seq, skip_special_tokens=True) for seq in input_ids]
print("Original sequences: ", original_texts)
print("predictions: ", logits)
print("targets: ", labels)
loss = torch.nn.MSELoss()(logits, labels)
return (loss, outputs) if return_outputs else loss
model_params = [frozen_base_model, unfrozen_base_model]
for model_param in model_params:
model.roberta = model_param
for weight_decay in [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1]:
for warmup_steps in [0, 1000, 5000, 10000, 15000, 20000]:
if model_param == frozen_base_model:
model_name = "model_name"
elif model_param == unfrozen_base_model:
model_name = "model_name"
wandb.init(entity="name", project="proj", name=model_name + "")
# Now, let's set up the TrainingArguments and the RegressionTrainer.
training_args = TrainingArguments(
output_dir="./LM", # output directory for model predictions and checkpoints
overwrite_output_dir=True,
num_train_epochs=10, # total number of training epochs
per_device_train_batch_size=16, # batch size per device during training
per_device_eval_batch_size=64, # batch size for evaluation
warmup_steps=warmup_steps, # number of warmup steps for learning rate scheduler
weight_decay=weight_decay, # strength of weight decay
logging_dir="./logs", # directory for storing logs
logging_steps=10, # when to print log
evaluation_strategy="steps",
report_to='wandb',
save_total_limit=2,
hub_private_repo=True,
)
trainer = RegressionTrainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=eval_dataset, # evaluation dataset
# data_collator=data_collator
)
trainer.add_callback(PushToHubCallback(trainer, model_name))
trainer.train()
wandb.finish()
- Download model from hub to test inference with
downloaded_model = RobertaForRegression.from_pretrained("repo", use_auth_token=True))
target = "known value"
encoded_sequence = tokenizer.encode_plus(
sequence,
truncation=True,
padding="max_length",
max_length=128,
return_tensors="pt",
)
# Forward pass
with torch.no_grad(): # Deactivates autograd, reduces memory usage and speeds up computation
downloaded_model.to("cuda") # Puts model in evaluation mode
downloaded_model.eval()
outputs = downloaded_model(
input_ids=encoded_sequence["input_ids"].to("cuda"),
attention_mask=encoded_sequence["attention_mask"].to("cuda")
)
predicted = outputs # Converts the output tensor to a Python number
print(f"Predicted: {predicted}")
print(target)
- Test local weights that have been trained to compare with the weights downloaded from the hub
target = "known value"
encoded_sequence = tokenizer.encode_plus(
sequence,
truncation=True,
padding="max_length",
max_length=128,
return_tensors="pt",
)
# Forward pass
with torch.no_grad(): # Deactivates autograd, reduces memory usage and speeds up computation
model.eval() # Puts model in evaluation mode
outputs = model(
input_ids=encoded_sequence["input_ids"].to("cuda"),
attention_mask=encoded_sequence["attention_mask"].to("cuda")
)
predicted = outputs # Converts the output tensor to a Python number
print(f"Predicted: {predicted}")
print(target)
PROBLEM: #2 and #3 have completely different values, even though the input sequence is exactly the same. Note: #3 (using the local weights without downloading, gives a result that is closely aligned with the training run outputs, which is good. The downloaded model gives poor predictions.)
Expected behavior
I would expect when running inference on the downloaded model weights, that the result would be the same or similar as when running inference on the initially trained model locally. I do a test to make sure that the uploaded model weights are the same as the local model weights by redownloading it. What could possibly be the issue? I’ve spent hours racking my brain about this!
Thanks!