I Am using it with falcon and its using too much ram

zsdchow · July 9, 2022, 5:30pm

How can i reduce ram utilizaion

i have used this but i dont know how to use this properly


import falcon

id2label=  {
    "0": "n/a",
    "1": "arts and culture",
    "2": "digital",
    "3": "education",
    "4": "climate change",
    "5": "studying abroad",
    "6": "renewable energy",
    "7": "policy dialogues",
    "8": "youth and the world",
    "9": "environmental sustainability",
    "10": "health and well-being",
    "11": "sports",
    "12": "inclusion",
    "13": "employment",
    "14": "european learning mobility",
    "15": "research and innovation",
    "16": "democratic values"
}
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
tokenizer = AutoTokenizer.from_pretrained("./demo")
model = AutoModelForSequenceClassification.from_pretrained("./demo")

import os
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
    model.save_pretrained(tmp_dir)
    # print(sorted(os.listdir(tmp_dir)))

# Here is pytorch+model. json
with tempfile.TemporaryDirectory() as tmp_dir:
    model.save_pretrained(tmp_dir, max_shard_size="5MB")
    # print(sorted(os.listdir(tmp_dir)))




with tempfile.TemporaryDirectory() as tmp_dir:
    model.save_pretrained(tmp_dir, max_shard_size="5MB")
    new_model = AutoModelForSequenceClassification.from_pretrained(tmp_dir)


import json
with tempfile.TemporaryDirectory() as tmp_dir:
    model.save_pretrained(tmp_dir, max_shard_size="5MB")
    with open(os.path.join(tmp_dir, "pytorch_model.bin.index.json"), "r") as f:
        index = json.load(f)

# print(index.keys())


from transformers.modeling_utils import load_sharded_checkpoint
with tempfile.TemporaryDirectory() as tmp_dir:
    model.save_pretrained(tmp_dir, max_shard_size="5MB")
    load_sharded_checkpoint(model, tmp_dir)


import gc
class Home:
    def on_post(self, req, resp):
        # print("HERE")
        data = req.get_media()
        text = data.get("text")
        # print("i got text ", text[:50])
        inputs = tokenizer(text, return_tensors="pt")
        output = model(**inputs)

        o = {}
        p = []
        for idx,x in enumerate(output[0][0]):
            # print(x.item(), id2label[f'{idx}'])
            o[x.item()] = id2label[f'{idx}']
            p.append(x.item())
            
        po = {}
        for x in sorted(p, reverse=True):
            # print(o[x], x)
            po[o[x]] = x
    
        gc.collect()
        resp.media = {'wordvec': po}


    def on_get(self, req, resp):

        resp.media = {'love': 'is oK'}

home = Home()
app = falcon.App()
app.add_route('/', home)

Topic		Replies	Views
Simple example run takes 5+ minutes on rtx3060 - falcon7B Beginners	1	494	February 18, 2024
Embedding Vectors taking up large amounts of memory Models	0	642	October 26, 2022
Huggingface distilbert-base-uncased-finetuned-sst-2-english runs out of ram with only a few kb? Beginners	0	373	May 12, 2022
Accelerate use of memory 🤗Transformers	1	99	February 7, 2025
How to properly wrap a model for training with accelerate? 🤗Accelerate	1	1299	September 20, 2023

I Am using it with falcon and its using too much ram

Related topics