I have this problem with 4x10 . Bellow my code. Do anynoe has solution for this problem ?
I do not have in 4xt4
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
model_4bit = AutoModelForCausalLM.from_pretrained(model_id,
device_map="auto",
quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
with torch.no_grad():
inputs = tokenizer(template, return_tensors="pt").to(model_4bit.device)
outputs = model_4bit.generate(**inputs,
max_new_tokens=1024,
num_beams=1,
do_sample=False,
use_cache=True,
eos_token_id=tokenizer.eos_token_id
)
gc.collect()
torch.cuda.empty_cache()
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.split('[/INST]')[1]
output = response.strip()
return {"data": output}, 201