Error with accelerate when Fine tuning LLaVa

Hello,

I am trying to fine tuning a LLaVa model but I have the following error when train starts.

  File "/app/MIDOGpp-main/train_llava.py", line 287, in fine_tune_llava
    trainer.train()
  File "/usr/local/lib/python3.10/site-packages/transformers/trainer.py", line 2171, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/site-packages/transformers/trainer.py", line 2200, in _inner_training_loop
    train_dataloader = self.get_train_dataloader()
  File "/usr/local/lib/python3.10/site-packages/transformers/trainer.py", line 1018, in get_train_dataloader
    return self.accelerator.prepare(DataLoader(train_dataset, **dataloader_params))
TypeError: intercept_args() got an unexpected keyword argument 'persistent_workers'

I have the following code in my function:

def fine_tune_llava(train_data, epochs=3, lr=1e-5):
    # Load model
    model, processor = load_llava_model()
    
    # MIDOG parameters
    mean = [0.707464, 0.439671, 0.72065]
    std = [0.118969, 0.127489, 0.083481]
    
    # Images transformations
    data_transforms = {
        'train': A.Compose([
            A.Resize(224, 224),
            A.OneOf([
                A.HorizontalFlip(p=1),
                A.VerticalFlip(p=1),
                A.RandomRotate90(p=1),
            ], p=0.5),
            StainAugmentor(stain_matrix=get_target_stain_matrix()),
            A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.5, 0.5), contrast_limit=(-0.5, 0.5)),
            A.Affine(p=0.5, scale=(1.0, 2.0), shear=(-20, 20)),
            A.Normalize(mean, std),
        ], p=1.0),
        'val': A.Compose([
            A.Resize(224, 224),
            A.Normalize(mean, std),
        ], p=1.0),
        'test': A.Compose([
            A.Resize(224, 224),
            A.Normalize(mean, std),
        ], p=1.0)
    }
    
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    labels = [item[2] for item in train_data]
    
    fold = 1
    for train_index, val_index in skf.split(train_data, labels):
      print(f"Entrenando en el pliegue {fold}/5")
      
      # Divide val and train
      train_fold_data = [train_data[i] for i in train_index]
      val_fold_data = [train_data[i] for i in val_index]
        
      # Create Dataset
      train_dataset = ROIDataset(train_fold_data, processor, transform=data_transforms['train'])
      val_dataset = ROIDataset(val_fold_data, processor, transform=data_transforms['val'])
  
      # Define training arguments
      training_args = TrainingArguments(
          output_dir=f"./results_fold_{fold}",
          eval_strategy="epoch",
          learning_rate=lr,
          per_device_train_batch_size=16,
          per_device_eval_batch_size=16,
          num_train_epochs=epochs,
          weight_decay=0.0005,
          tf32=True,
          bf16=True,
          logging_dir=f"./logs_fold_{fold}",
          logging_steps=10,
      )
  
      # Initialize Trainer
      trainer = Trainer(
          model=model,
          args=training_args,
          train_dataset=train_dataset,
          eval_dataset=val_dataset,
          processing_class=processor.tokenizer,
      )
  
      # Fine-tuning the model
      trainer.train()
      
      fold +=1

I have tried update transformers and accelerate and its not working.

pip install --upgrade transformers accelerate

1 Like

It looks more like a DataLoader error in torch than a transformers error. Maybe the torch version or data format is wrong.

I have torch 2.5.1 and the dataset is created like this:

class ROIDataset(torch.utils.data.Dataset):
    def __init__(self, data, processor, transform):
        self.data = data
        self.transform = transform
        self.processor = processor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path, bbox, category_id = self.data[idx]
        image = Image.open(image_path).convert("RGB")
        roi = image.crop(bbox)
        if self.transform:
            roi = roi.convert("RGB")
            roi = np.array(roi)
            roi_transformed = self.transform(image = roi)
            roi = roi_transformed['image']
            if roi.dtype == np.float32:
                roi = (roi * 255).astype(np.uint8)
            roi = Image.fromarray(roi)
        
        # Generate the prompts
        if category_id == 0:
            prompt = "USER: <image>\nIs a mitotic cell in this image?\nASSISTANT: Yes, there is a mitotic cell in this image."
        else:
            prompt = "USER: <image>\nIs a mitotic cell in this image?\nASSISTANT: No, there is no mitotic cell in this image."

        # Process the text
        processed = self.processor(text=prompt, images=roi, padding=True, truncation=True, return_tensors="pt")
        labels = processed["input_ids"].clone()
        labels[labels == self.processor.tokenizer.pad_token_id] = -100
        processed["labels"] = labels
        
        return processed["input_ids"], processed["attention_mask"], processed["pixel_values"], processed["labels"]
1 Like

If you don’t use the Dataset class from the huggingface datasets library instead of the Dataset class from PyTorch, you’re likely to get errors. There is some compatibility, but the behavior is different…