Here’s my code:
from sagemaker.huggingface import HuggingFaceModel
hub = {
'HF_MODEL_ID':'EleutherAI/gpt-j-6B', # model_id from hf.co/models
'HF_TASK':'question-answering' # NLP task you want to use for predictions
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
env=hub,
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.6", # transformers version used
pytorch_version="1.7", # pytorch version used
py_version="py36", # python version of the DLC
)
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g4dn.12xlarge"
)
predictor.predict({
'inputs': "How are you?"
})
And here’s the error thrown on predict
ModelError Traceback (most recent call last)
in
1 predictor.predict({
----> 2 ‘inputs’: “How are you?”
3 })
/opt/conda/lib/python3.7/site-packages/sagemaker/predictor.py in predict(self, data, initial_args, target_model, target_variant, inference_id)
159 data, initial_args, target_model, target_variant, inference_id
160 )
→ 161 response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
162 return self._handle_response(response)
163
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
355 “%s() only accepts keyword arguments.” % py_operation_name)
356 # The “self” in this scope is referring to the BaseClient.
→ 357 return self._make_api_call(operation_name, kwargs)
358
359 _api_call.name = str(py_operation_name)
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
674 error_code = parsed_response.get(“Error”, {}).get(“Code”)
675 error_class = self.exceptions.from_code(error_code)
→ 676 raise error_class(parsed_response, operation_name)
677 else:
678 return parsed_response
ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
“code”: 400,
“type”: “InternalServerException”,
“message”: “\u0027gptj\u0027”
}
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/huggingface-pytorch-inference-2023-02-24-22-12-33-444 in account 748895571360 for more information.
Lastly here are the cloudwatch logs, pls help! Thank you so much
2023-02-24 22:33:03,967 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction error
2023-02-24 22:33:03,967 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Traceback (most recent call last):
2023-02-24 22:33:03,967 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/sagemaker_huggingface_inference_toolkit/handler_service.py”, line 207, in handle
2023-02-24 22:33:03,967 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - self.initialize(context)
2023-02-24 22:33:03,967 [INFO ] W-9000-EleutherAI__gpt-j-6B com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 2
2023-02-24 22:33:03,967 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/sagemaker_huggingface_inference_toolkit/handler_service.py”, line 78, in initialize
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - self.model = self.load(self.model_dir)
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/sagemaker_huggingface_inference_toolkit/handler_service.py”, line 105, in load
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - hf_pipeline = get_pipeline(task=os.environ[“HF_TASK”], model_dir=model_dir, device=self.device)
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/sagemaker_huggingface_inference_toolkit/transformers_utils.py”, line 230, in get_pipeline
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - hf_pipeline = pipeline(task=task, model=model_dir, tokenizer=model_dir, device=device, **kwargs)
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/transformers/pipelines/init.py”, line 389, in pipeline
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - framework, model = infer_framework_from_model(model, targeted_task, revision=revision, task=task)
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/transformers/pipelines/base.py”, line 88, in infer_framework_from_model
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - model = model_class.from_pretrained(model, **model_kwargs)
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/transformers/models/auto/auto_factory.py”, line 376, in from_pretrained
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/transformers/models/auto/configuration_auto.py”, line 432, in from_pretrained
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - config_class = CONFIG_MAPPING[config_dict[“model_type”]]
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - KeyError: ‘gptj’
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - During handling of the above exception, another exception occurred:
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle -
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Traceback (most recent call last):
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/mms/service.py”, line 108, in predict
2023-02-24 22:33:03,968 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - ret = self._entry_point(input_batch, self.context)
2023-02-24 22:33:03,968 [INFO ] W-9000-EleutherAI__gpt-j-6B ACCESS_LOG - /169.254.178.2:56264 “POST /invocations HTTP/1.1” 400 5
2023-02-24 22:33:03,969 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - File “/opt/conda/lib/python3.6/site-packages/sagemaker_huggingface_inference_toolkit/handler_service.py”, line 231, in handle
2023-02-24 22:33:03,969 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - raise PredictionException(str(e), 400)
2023-02-24 22:33:03,969 [INFO ] W-EleutherAI__gpt-j-6B-3-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - mms.service.PredictionException: ‘gptj’ : 400