Out of memory error when creating a lot of embeddings

I am trying to create embedded ngs from a large amount of text. I am doing it paragraph by paragraph in a loop. I always run out of gpu memory. Any idea why this is happening?

Code looks like this

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xxl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl", device_map="auto", torch_dtype=torch.float16)


def getSentenceEmbedding(sentenceText, languageModel, modelTokenizer):
    sentence_tokens = modelTokenizer(sentenceText, return_tensors="pt")
    sentence_input_ids = sentence_tokens.input_ids  #.to('cuda')
    encodings = languageModel.encoder(input_ids=sentence_input_ids, attention_mask=sentence_tokens.attention_mask, return_dict=True)
    del sentence_input_ids
    del sentence_tokens
    gc.collect()
    torch.cuda.empty_cache()
    return torch.mean(encodings.last_hidden_state, dim=1)


Embeddedings=[]
for paragraph in text:
     torch.cuda.empty_cache()
     Embeddedings.append((getSentenceEmbedding(parag,model,tokenizer)))