.map - function overloads my Cache

Hi im currently trying to map about 5000 audio files into my dataset in aws Sagemaker. When i try to do that, my Kernel crashes after a few minutes, i assume that the its overloading my chache memory… When i use map with only 1000 files it works fine! Does anybody know what i can do about that? That is my code and the function i you to process mydata:

train_dataset = train_dataset.map(preprocess_function, batched=True, batch_size=None, keep_in_memory=True, load_from_cache_file=False) def preprocess_function(examples):

def preprocess_function(examples):

   audio_arrays = [list(x["array"]) for x in examples["audio"]]

   max_length_audio = max(len(audio) for audio in audio_arrays)

   audio_arrays_padded = [np.pad(audio, (0, max_length_audio - len(audio))) if len(audio) < max_length_audio else audio[:max_length_audio] for audio in audio_arrays]

   print(audio_arrays_padded[0][:10])

   text_list = examples['transcription']

   input_data = processor(
       audio=audio_arrays_padded,
       text_target=text_list,
       sampling_rate=16000,
       return_tensors='pt',
       return_attention_mask=True,
       padding='longest'
   )

print(input_data)
print(input_data['input_values'].shape)
print(input_data['attention_mask'].shape)
print(input_data['labels'].shape)
print(input_data['decoder_attention_mask'].shape)

# return 'input_values': input_data['input_values'], 'attention_mask': input_data['attention_mask'], 'labels': input_data['labels'], 'decoder_attention_mask': input_data['decoder_attention_mask']
return {"input_values": input_data['input_values'],
        "attention_mask": input_data['attention_mask'],
        "labels": input_data['labels'],
        "decoder_attention_mask": input_data['decoder_attention_mask']}