My RAM usage is continuously increasing when doing speech to text using distil-whisper/small.en
My server is deployed on GPU, the ram keeps on increasing and does not reduce.
Same thing is also noticed on cpu when running on local, the ram usage keeps on increasing.
def transcribe(self, arr):
mem_usage = memory_usage(max_usage=True) # Get max memory usage in MB
logger.info(f"Current memory usage in transciber before generating output: {mem_usage} MB")
text = self.transcriber(arr)
# Clean up
del arr, audio_data
torch.cuda.empty_cache()
mem_usage = memory_usage(max_usage=True) # Get max memory usage in MB
logger.info(f"Current memory usage in transciber after generating output: {mem_usage} MB")
return text
def __init__(self) -> None:
# logger.info(f"Loading model {self.model}")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float32
logger.info(torch_dtype)
logger.info(device)
model_id = "distil-whisper/distil-small.en"
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id,
torch_dtype=torch_dtype,
low_cpu_mem_usage=False,
use_safetensors=True,
use_cache=False
)
self.model.to(device)
self.processor = AutoProcessor.from_pretrained(model_id)
self.transcriber = pipeline(
"automatic-speech-recognition",
model=self.model,
tokenizer=self.processor.tokenizer,
feature_extractor=self.processor.feature_extractor,
max_new_tokens=128,
use_fast=False,
chunk_length_s=10,
batch_size=8,
torch_dtype=torch_dtype,
device=device
)
logger.info("Model loaded")
Above is the sample code i am using, please help if anyone have any idea about this.
The memory keep on increasing between the two points around 40MB everytime it executes, sometimes the increment is higher till 200MB