Embedding Vectors taking up large amounts of memory

Hello, so I am trying to use carptriever-1 to embed batches of documents (>800-1000) usually, and even trying to embed 50 documents takes up 10+ GB of memory with the code below. Any advice on how I can resolve this issue?

from transformers import AutoTokenizer, AutoModel
import torch

device = "cpu" #issue occurs with CPU or cuda
model = AutoModel.from_pretrained("CarperAI/carptriever-1", add_pooling_layer=False)
tokenizer = AutoTokenizer.from_pretrained("CarperAI/carptriever-1")

def mean_pooling(token_embeddings, mask):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings

class CarptrieverInstance:
    def __init__(self, query, documents):
        self.query = query
        self.documents = documents
        self.sentences = [query] + documents
        self.embeddings = self.embed()
        self.scores = self.search(self.embeddings)

    def embed(self):
        chunks = [self.documents[i:i + 40] for i in range(0, len(self.documents), 40)]
        total_embeddings = []
        for chunk in chunks:
            inputs = tokenizer(chunk, padding=True, truncation=True, return_tensors='pt')
            outputs = model(**inputs)
            embeddings = mean_pooling(outputs[0], inputs['attention_mask'])
            del inputs, outputs, embeddings
        del inputs, outputs, embeddings # trying to clear some memory here, appears to have no effect
        return torch.cat([total_embeddings[0]] + total_embeddings[1:], dim=0).cpu().tolist()

    def search(self, embeddings):
        query_embedding, sentence_embeddings = embeddings[0], embeddings[1:]
        scores = (query_embedding @ sentence_embeddings.transpose(0, 1)).cuda().tolist()
        sentence_score_pairs = sorted(zip(self.sentences[1:], scores), reverse=True)
        sentence_score_pairs.sort(key=lambda x: x[1], reverse=True)
        return sentence_score_pairs