I’m using the “Fine-tuning for Image Classification with Transformers” notebook and using it on my own custom dataset and am fine-tuning it using “apple/mobilevitv2-1.0-imagenet1k-256”, there were no error previous to me running trainer.train(). After 15 minutes of no bar showing up it finally appeared but there was no progress, and another 7 minutes later it moves up by 1 and gives an ETA of 60,000 hours.
here’s my code:
pip install torch==1.11.0
pip install -q datasets transformers
pip install fsspec==2023.9.2
import tensorflow as tf
tf.config.list_physical_devices('GPU')
model_checkpoint = "apple/mobilevitv2-1.0-imagenet1k-256" # pre-trained model from which to fine-tune
batch_size = 32 # batch size for training and evaluation
pip install ipywidgets
from huggingface_hub import notebook_login
notebook_login()
%%capture
!git config --global credential.helper store
from transformers.utils import send_example_telemetry
send_example_telemetry("image_classification_notebook", framework="pytorch")
import datasets
datasets.__version__
from datasets import load_dataset
# load a custom dataset from local/remote files or folders using the ImageFolder feature
# option 1: local/remote files (supporting the following formats: tar, gzip, zip, xz, rar, zstd)
dataset = load_dataset("imagefolder", data_files=r"Desktop/Finished.zip")
# note that you can also provide several splits:
# dataset = load_dataset("imagefolder", data_files={"train": ["path/to/file1", "path/to/file2"], "test": ["path/to/file3", "path/to/file4"]})
# note that you can push your dataset to the hub very easily (and reload afterwards using load_dataset)!
# dataset.push_to_hub("nielsr/eurosat")
# dataset.push_to_hub("nielsr/eurosat", private=True)
# option 2: local folder
# dataset = load_dataset("imagefolder", data_dir= r"Desktop/Finished.zip")
# option 3: just load any existing dataset from the hub, like CIFAR-10, FashionMNIST ...
# dataset = load_dataset("food101")
from datasets import load_metric
trust_remote_code=True
metric = load_metric("accuracy")
dataset
pip install Pillow
import PIL
example = dataset["train"][10]
example
dataset["train"].features
example['image']
example['image'].resize((200, 200))
example['label']
labels = dataset["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
label2id[label] = i
id2label[i] = label
id2label[2779]
from transformers import AutoImageProcessor
image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor
from torchvision.transforms import (
CenterCrop,
Compose,
Normalize,
RandomHorizontalFlip,
RandomResizedCrop,
Resize,
ToTensor,
)
# normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if "height" in image_processor.size:
size = (image_processor.size["height"], image_processor.size["width"])
crop_size = size
max_size = None
elif "shortest_edge" in image_processor.size:
size = image_processor.size["shortest_edge"]
crop_size = (size, size)
max_size = image_processor.size.get("longest_edge")
train_transforms = Compose(
[
RandomResizedCrop(crop_size),
RandomHorizontalFlip(),
ToTensor(),
# normalize,
]
)
val_transforms = Compose(
[
Resize(size),
CenterCrop(crop_size),
ToTensor(),
# normalize,
]
)
def preprocess_train(example_batch):
"""Apply train_transforms across a batch."""
example_batch["pixel_values"] = [
train_transforms(image.convert("RGB")) for image in example_batch["image"]
]
return example_batch
def preprocess_val(example_batch):
"""Apply val_transforms across a batch."""
example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
return example_batch
# split up training into training + validation
splits = dataset["train"].train_test_split(test_size=0.1)
train_ds = splits['train']
val_ds = splits['test']
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)
train_ds[0]
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
model = AutoModelForImageClassification.from_pretrained(
model_checkpoint,
label2id=label2id,
id2label=id2label,
ignore_mismatched_sizes = True,
).to(device)
! pip install -U accelerate
model_name = model_checkpoint.split("/")[-1]
args = TrainingArguments(
f"{model_name}",
remove_unused_columns=False,
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=1e-4,
per_device_train_batch_size=batch_size,
gradient_accumulation_steps=2,
per_device_eval_batch_size=batch_size,
gradient_checkpointing = True,
gradient_checkpointing_kwargs={'use_reentrant':False},
fp16=True,
num_train_epochs=15,
warmup_ratio=0.1,
weight_decay=-0.01,
logging_strategy = "steps",
load_best_model_at_end=True,
metric_for_best_model="accuracy",
push_to_hub=False,
)
import numpy as np
# the compute_metrics function takes a Named Tuple as input:
# predictions, which are the logits of the model as Numpy arrays,
# and label_ids, which are the ground-truth labels as Numpy arrays.
def compute_metrics(eval_pred):
"""Computes accuracy on a batch of predictions"""
predictions = np.argmax(eval_pred.predictions, axis=1)
return metric.compute(predictions=predictions, references=eval_pred.label_ids)
import torch
def collate_fn(examples):
pixel_values = torch.stack([example["pixel_values"] for example in examples])
labels = torch.tensor([example["label"] for example in examples])
return {"pixel_values": pixel_values, "labels": labels}
trainer = Trainer(
model,
args,
train_dataset=train_ds,
eval_dataset=val_ds,
tokenizer=image_processor,
compute_metrics=compute_metrics,
data_collator=collate_fn,
)
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()
-
Python 3.10.14
-
pip 24.0
-
tensorflow 2.10.0
-
torch 1.11.0
-
torchvision 0.12.0
-
transformers 4.40.0
-
scikit-learn 1.3.0
-
safetensors 0.4.3
-
pillow 10.3.0
-
opencv 4.9.0.80
-
numpy 1.26.4
-
datasets 2.19.0
-
fsspec 2023.9.2
-
ipywidgets 8.1.2
-
huggingface-hub 0.22.2
I’m using Conda and WSL2, Ubuntu 22.04.3