Hi everyone,
I’m encountering an issue when trying to push my ViltProcessor
to the Hugging Face Hub. Below is my code:
from PIL import Image
from datasets import load_dataset
from transformers import ViltProcessor, ViltForQuestionAnswering, TrainingArguments, Trainer, DefaultDataCollator
import torch
import itertools
OUTPUT_DIR = 'muhammadfhadli/vilt-b32-finetuned-vqa-hse-v0.0.1'
# Load and display dataset
dataset = load_dataset("json", data_files="dataset.json", split="train")
print(f"dataset: {dataset}")
# Display image size
image = Image.open(dataset[0]['image_id'])
print(f"Image size: {image.width}x{image.height}")
# Generate label mappings
labels = list(itertools.chain(*[item['ids'] for item in dataset['label']]))
unique_labels = list(set(labels))
label2id = {label: idx for idx, label in enumerate(unique_labels)}
id2label = {idx: label for label, idx in label2id.items()}
# Replace label IDs in the dataset
def replace_ids(inputs):
inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
return inputs
dataset = dataset.map(replace_ids)
flat_dataset = dataset.flatten()
print(flat_dataset.features)
print(f"flat_dataset: {flat_dataset}")
# Initialize processor
model_checkpoint = "dandelin/vilt-b32-finetuned-vqa"
processor = ViltProcessor.from_pretrained(model_checkpoint)
# Preprocess data
def preprocess_data(examples):
images = [Image.open(path).convert("RGB") for path in examples['image_id']]
encoding = processor(images, examples['question'], padding="max_length", truncation=True, return_tensors="pt")
# Squeeze batch dimension
for key, value in encoding.items():
encoding[key] = value.squeeze()
# Prepare target tensors
encoding["labels"] = [
torch.zeros(len(id2label)).scatter_(0, torch.tensor(labels), torch.tensor(scores))
for labels, scores in zip(examples['label.ids'], examples['label.weights'])
]
return encoding
processed_dataset = flat_dataset.map(preprocess_data, batched=True,
remove_columns=['question', 'image_id', 'label.ids', 'label.weights'])
print(f"processed_dataset: {processed_dataset}")
# Prepare for training
data_collator = DefaultDataCollator()
model = ViltForQuestionAnswering.from_pretrained(
model_checkpoint,
num_labels=len(id2label),
id2label=id2label,
label2id=label2id,
ignore_mismatched_sizes=True
)
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=4,
num_train_epochs=20,
logging_steps=50,
learning_rate=5e-5,
remove_unused_columns=False,
push_to_hub=True,
hub_private_repo=True
)
# Train the model
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=processed_dataset,
)
trainer.train()
processor.push_to_hub(OUTPUT_DIR, private=True)
trainer.push_to_hub()
When I run processor.push_to_hub
, I get the following error:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/home/user/.pyenv/versions/vqa-train/lib/python3.10/site-packages/transformers/utils/hub.py", line 938, in push_to_hub
model_card = create_and_tag_model_card(
File "/home/user/.pyenv/versions/vqa-train/lib/python3.10/site-packages/transformers/utils/hub.py", line 1193, in create_and_tag_model_card
model_card = ModelCard.load(repo_id, token=token, ignore_metadata_errors=ignore_metadata_errors)
File "/home/user/.pyenv/versions/vqa-train/lib/python3.10/site-packages/huggingface_hub/repocard.py", line 189, in load
with card_path.open(mode="r", newline="", encoding="utf-8") as f:
File "/home/user/.pyenv/versions/3.10.12/lib/python3.10/pathlib.py", line 1119, in open
return self._accessor.open(self, mode, buffering, encoding, errors,
IsADirectoryError: [Errno 21] Is a directory: 'muhammadfhadli/vilt-b32-finetuned-vqa-hse-v0.0.1'
I tried setting use_temp_dir=True
and use_temp_dir=False
, but it still didn’t work. It only worked after I manually deleted the directory muhammadfhadli/vilt-b32-finetuned-vqa-hse-v0.0.1
before running processor.push_to_hub(OUTPUT_DIR, private=True)
.
I’m trying similar answers from this GitHub issue, but none of them seem to work for me.
I would appreciate any insights or suggestions on how to resolve this issue.
Thanks in advance!