I want to test my peft lora on DocVQA (Qwen-2.5-VL-3B) but I am unable to do so. here is the code to reproduce error.
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from transformers import AutoProcessor, AutoModelForImageTextToText , BitsAndBytesConfig
from trl import SFTConfig, SFTTrainer
import torch
device = “cuda” if torch.cuda.is_available() else “cpu”
model_id = “Qwen/Qwen2.5-VL-3B-Instruct”
min_pixels = 2242828
max_pixels = 2242828
processor = AutoProcessor.from_pretrained(model_id, min_pixels=min_pixels, max_pixels=max_pixels)
processor.tokenizer.padding_side = “right”
base_model = AutoModelForImageTextToText.from_pretrained(
model_id,
torch_dtype=torch.float16,
load_in_8bit=True, # Load in 8-bit precision
device_map=“auto”
)
base_model.enable_input_require_grads()
model = PeftModel.from_pretrained(base_model, “./docmat/iter-6”, is_trainable=False)
model.print_trainable_parameters()
from datasets import load_dataset
from tqdm import tqdm
import torch
from collections import defaultdict
def evaluate_docvqa(model, processor, split=“test”, num_samples=None):
dataset = load_dataset(“lmms-lab/DocVQA”, “DocVQA”, split=f"{split}[:1]")
if num_samples:
dataset = dataset.select(range(min(num_samples, len(dataset))))
correct = 0
results = defaultdict(list)
model.eval()
with torch.no_grad():
for sample in tqdm(dataset):
# Add the <image> token to the question text
text_with_image = f"<image>{sample['question']}"
# Process the image and text
inputs = processor(
images=sample['image'],
text=text_with_image, # Include <image> token
return_tensors="pt",
padding=True,
truncation=True
).to(device)
# Generate the output
outputs = model.generate(**inputs, max_length=50, num_beams=5, early_stopping=True)
predicted_answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
# Check if the predicted answer matches any ground truth answer
is_correct = any(predicted_answer.lower() == ans.lower() for ans in sample['answers'])
correct += int(is_correct)
# Store results
results['questions'].append(sample['question'])
results['predictions'].append(predicted_answer)
results['ground_truth'].append(sample['answers'])
results['correct'].append(is_correct)
accuracy = correct / len(dataset)
return accuracy, results
accuracy, results = evaluate_docvqa(model, processor, num_samples=100)
print(f"Test Accuracy: {accuracy:.4f}")