Hi
I’m a student, new to this topic in general and that’s my first time using Hugging Face.
I want to fine-tune ViT with my own dataset, In order to do so, I’m following this tutorial, but since I don’t import my data set from datasets, some things are different.
This is my custom data set class:
import os
import torch
import numpy as np
from transformers import ViTImageProcessor
from torchvision import transforms
from PIL import Image
model_name_or_path = ‘google/vit-base-patch16-224-in21k’
feature_extractor = ViTImageProcessor.from_pretrained(model_name_or_path)
class ViTCustomDataSet():
def __init__(self, image_dir, device):
# super(ImageDataset, self).__init__()
self.image_dir = image_dir
self.transform = transforms.Compose([
transforms.ToPILImage(),
self.extract_features
])
self.image_dir = image_dir
self.device = device
def extract_features(self, image):
return feature_extractor(image, return_tensors='pt')
return feature_extractor(image, return_tensors='pt')['pixel_values']
def __len__(self):
return len(os.listdir(self.image_dir))
def __getitem__(self, idx):
image_name = os.listdir(self.image_dir)[idx]
image_path = os.path.join(self.image_dir, image_name)
image = np.array(Image.open(image_path).convert('RGB'))
image = self.transform(image)
label = self.get_label_from_filename(image_name)
return image.to(self.device), label
def get_label_from_filename(self, image_name):
return 1 if image_name.split('_')[-2] == 'M' else 0
def get_train_and_test_data(self):
train_size = int(0.8 * self.__len__())
test_size = self.__len__() - train_size
train_dataset, test_dataset = torch.utils.data.random_split(
self, [train_size, test_size])
return torch.utils.data.DataLoader(
train_dataset, batch_size=32, shuffle=True).dataset, torch.utils.data.DataLoader(
test_dataset, batch_size=32, shuffle=True).dataset
And that’s my main:
training_args = TrainingArguments(
output_dir="./vit",
# per_device_train_batch_size=16,
evaluation_strategy="steps",
num_train_epochs=4,
# fp16=True,
save_steps=100,
eval_steps=100,
logging_steps=10,
learning_rate=2e-4,
save_total_limit=2,
remove_unused_columns=False,
push_to_hub=False,
report_to='tensorboard',
load_best_model_at_end=True,
)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
vitData = ViTCustomDataSet(
'D:/data/NIST302/images/auxiliary/flat/M/500/plain/png/equal', device=device)
train_dataset, test_dataset = vitData.get_train_and_test_data()
model = ViTForImageClassification.from_pretrained(
model_name_or_path,
num_labels=num_classes)
feature_extractor = ViTImageProcessor.from_pretrained(
model_name_or_path, padding=False)
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_dataset,
eval_dataset=test_dataset,
data_collator=None,
tokenizer=feature_extractor,
)
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()
metrics = trainer.evaluate(test_dataset)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
(I’m sorry for the mess, I’m transforming an old code of mine and I want to make sure things work before I refactor)
Anyway, I’m getting the following error:
ViTImageProcessor does not have a pad attribute
Which I don’t see how to solve.
Also - since I’m new, any other tip will be welcomed!
Thank you