Hi, I’m trying to deploy the google/flan-t5-xl
model via Sagemaker like so:
import sagemaker
from sagemaker.huggingface import HuggingFaceModel
role = sagemaker.get_execution_role()
hub = {
'HF_MODEL_ID':'google/flan-t5-xl',
'HF_TASK':'text-generation'
}
huggingface_model = HuggingFaceModel(
env=hub,
role=role,
transformers_version="4.6",
pytorch_version="1.7",
py_version="py36",
)
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.4xlarge",
endpoint_name="flan-t5-demo"
)
Up to this point everything works, but once I call:
question = "Which instances can I use with Managed Spot Training in SageMaker?"
out = predictor.predict({"inputs": question})
out
I see the following error:
ModelError Traceback (most recent call last)
<ipython-input-5-991f8ae0e78c> in <module>
1 question = "Which instances can I use with Managed Spot Training in SageMaker?"
2
----> 3 out = predictor.predict({"inputs": question})
4 out
/opt/conda/lib/python3.7/site-packages/sagemaker/base_predictor.py in predict(self, data, initial_args, target_model, target_variant, inference_id, custom_attributes)
183 custom_attributes,
184 )
--> 185 response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
186 return self._handle_response(response)
187
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
528 )
529 # The "self" in this scope is referring to the BaseClient.
--> 530 return self._make_api_call(operation_name, kwargs)
531
532 _api_call.__name__ = str(py_operation_name)
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
962 error_code = parsed_response.get("Error", {}).get("Code")
963 error_class = self.exceptions.from_code(error_code)
--> 964 raise error_class(parsed_response, operation_name)
965 else:
966 return parsed_response
ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
"code": 400,
"type": "InternalServerException",
"message": "Unrecognized configuration class \u003cclass \u0027transformers.models.t5.configuration_t5.T5Config\u0027\u003e for this kind of AutoModel: AutoModelForCausalLM.\nModel type should be one of BigBirdPegasusConfig, GPTNeoConfig, BigBirdConfig, CamembertConfig, XLMRobertaConfig, RobertaConfig, BertConfig, OpenAIGPTConfig, GPT2Config, TransfoXLConfig, XLNetConfig, XLMConfig, CTRLConfig, ReformerConfig, BertGenerationConfig, XLMProphetNetConfig, ProphetNetConfig, BartConfig, MBartConfig, PegasusConfig, MarianConfig, BlenderbotConfig, BlenderbotSmallConfig, MegatronBertConfig."
}
".
How can I fix this?