SFTTrainer formatting_func slower and higher loss when processing multiple samples

With all other parameters constant, why is the second formatting_func slower and leads to higher loss than the first?

def formatting_func_single(example):
  text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

def formatting_func_multi(example):
  output_texts = list()
  for i in range(len(example["quote"])):
    text = f"Quote: {example['quote'][i]}\nAuthor: {example['author'][i]}"
    output_texts.append(text)
  return output_texts

Results in screenshot (see loss and train_steps_per_second):

Full code (can run in a Colab on a T4):

!pip3 install -q -U huggingface_hub==0.21.4
!pip3 install -q -U bitsandbytes==0.42.0
!pip3 install -q -U peft==0.8.2
!pip3 install -q -U trl==0.7.10
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U transformers==4.38.1

# %

import huggingface_hub
import torch
import os
import transformers
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from datasets import Dataset, DatasetDict

os.environ["WANDB_DISABLED"] = "true"

# %

model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id, quantization_config=bnb_config, device_map={"":0})

# %

from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")

# %

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

# %

def formatting_func_single(example):
  text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

def formatting_func_multi(example):
  output_texts = list()
  for i in range(len(example["quote"])):
    text = f"Quote: {example['quote'][i]}\nAuthor: {example['author'][i]}"
    output_texts.append(text)
  return output_texts

# %

# @title Train using formatting_func_single
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    max_seq_length=128,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func_single,
)
trainer.train()

# %

# @title Train using formatting_func_multi
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    max_seq_length=128,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func_multi,
)
trainer.train()