I am trying to deploy Sagemaker Endpoint with custom scripts using model_fn, predict fn and input_fn using sqlcoder7b-2 and I am getting error
TypeError: model_fn() takes 1 positional argument but 2 were given
#code/inference.py
def model_fn(model_dir):
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(
model_dir,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
use_cache=True,
cache_dir ="sqlcoder",
offload_folder="offload_sqlcoder",
force_download=True
)
model_dict = {'model':model, 'tokenizer':tokenizer}
return model_dict
def predict_fn(data, model_dict):
tokenizer = model_dict['tokenizer']
model = model_dict['model']
question=data["inputs"]
print("Question is",question)
print("Prompt is",prompt)
updated_prompt = prompt.format(question=question)
print("Updated Prompt is",prompt)
inputs = tokenizer(updated_prompt, return_tensors="pt").to("cuda")
generated_ids = model.generate(
**inputs,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
max_new_tokens=400,
do_sample=False,
num_beams=1,
)
outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
torch.cuda.empty_cache()
torch.cuda.synchronize()
return sqlparse.format(outputs[0].split("[SQL]")[-1], reindent=True)
def input_fn(request_body, request_content_type):
# Transform the input request to a dictionary
print("Request body is",request_body)
print("Request Content Type is", request_content_type)
request = json.loads(request_body)
return request
#Deployment Script
from sagemaker.huggingface.model import HuggingFaceModel
hub = {
# 'HF_MODEL_ID':'defog/sqlcoder-7b-2', # model_id from hf.co/models
'HF_TASK':'text-generation' # NLP task you want to use for predictions
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
model_data=s3_location, # path to your model and script
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.37.0", # transformers version used
pytorch_version="2.1.0", # pytorch version used
py_version='py310', # python version used
env=hub
)
# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.xlarge"
)