RuntimeError: CUDA error: device-side assert triggered 4x10


I have this problem with 4x10 . Bellow my code. Do anynoe has solution for this problem ?
I do not have in 4xt4

model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
model_4bit = AutoModelForCausalLM.from_pretrained(model_id,
                                                  device_map="auto",
                                                  quantization_config=quantization_config)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token


 with torch.no_grad():
                inputs = tokenizer(template, return_tensors="pt").to(model_4bit.device)
                outputs = model_4bit.generate(**inputs,
                                              max_new_tokens=1024,
                                              num_beams=1,
                                              do_sample=False,
                                              use_cache=True,
                                              eos_token_id=tokenizer.eos_token_id
                                              )
                gc.collect()
                torch.cuda.empty_cache()
                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                response = response.split('[/INST]')[1]
                output = response.strip()
                return {"data": output}, 201