Wav2vec2 latent representation

I am trying to extract latent representation of audio data using wav2vec2 pre-trainedd moel. However, When trying to run with batched data, it throws error “TypeError: list indices must be integers or slices, not str”

import torch
from datasets import Audio, load_dataset,Dataset, DatasetDict, IterableDatasetDict
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, Wav2Vec2Model
from dataclasses import dataclass

def prepare_dataset(batch, processor):
	batch["input_features"] = processor(batch["audio"]["array"], sampling_rate = 16000, return_tensors="pt", padding=True).input_values
	batch["attention_mask"] = torch.ones(batch["input_features"].shape)
	return batch	

model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
test_data = IterableDatasetDict()
test_data["test"] = load_dataset("mozilla-foundation/common_voice_11_0", "en", split="test", use_auth_token=True, streaming=True)
test_data = test_data.cast_column("audio", Audio(sampling_rate=16000))
vectorized_datasets = test_data.map(prepare_dataset, processor, batched=True, batch_size=4, remove_columns=list(next(iter(test_data.values())).features)).with_format("torch")

with torch.no_grad():
	for batch in vectorized_datasets["test"]:
		hidden_states = model(batch["input_features"], attention_mask=batch["attention_mask"]).last_hidden_state
		print(hidden_states)

Request HF team to help me debugging the root cause behind the error.