Hey!
Been experiencing this error and have tried diagnosing but not sure where the problem might be.
UnexpectedStatusException: Error hosting endpoint summarization-endpoint: Failed. Reason: Please make sure all images included in the model for the production variant AllTraffic exist, and that the execution role used to create the model has permissions to access them..
I initially thought it had to do with IAM Permissions but I am no longer sure that is the case since I added all permissions that might be relevant and I donât think itâs an issue of the resource not being assigned to the right policy. The model is being created but the endpoint is not being processed correctly. I also considered whether my model.tar.gz was corrupted but even when I tried uploading a model directly from the Hugging Face Hub, I am met with this error message. For some reason as well, no CloudWatch logs are being saved despite the CloudWatch Log Group being created for /aws/sagemaker/Endpoints/summarization-endpoint and having all relevant permissions.
The script is below:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import BytesDeserializer
import sagemaker
model_name = 'summarization-model'
endpoint_name = 'summarization-endpoint'
role = sagemaker.get_execution_role()
# Hub Model configuration. https://huggingface.co/models
# hub = {
# 'HF_MODEL_ID':'google/pegasus-large',
# 'HF_TASK':'summarization'
# }
# # create Hugging Face Model Class
# huggingface_model = HuggingFaceModel(
# transformers_version='4.6.1',
# pytorch_version='1.7.1',
# py_version='py36',
# env=hub,
# role=role,
# )
# # create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
model_data="s3://qfn-transcription/ujjawal_files/model.tar.gz", # path to your trained sagemaker model
role=role, # iam role with permissions to create an Endpoint
transformers_version="4.6.1", # transformers version used
pytorch_version="1.7.1", # pytorch version used
py_version='py36',
name=model_name
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1, # number of instances
instance_type='ml.g4dn.xlarge',#'ml.m5.xlarge',ml.inf1.xlarge
endpoint_name=endpoint_name,
)
predictor.predict({
'inputs': "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."
})
Thanks!
IAM Permissions:
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:GetObject",
"iam:GetRole",
"iam:PassRole",
"sagemaker:GetRecord"
],
"Resource": [
"arn:aws:sagemaker:*:216283767174:feature-group/*",
"arn:aws:s3:::qfn-transcription/*",
"arn:aws:iam::216283767174:role/callTranscriptionsRole"
]
},
{
"Sid": "VisualEditor1",
"Effect": "Allow",
"Action": [
"sagemaker:CreateModel",
"logs:GetLogRecord",
"logs:DescribeSubscriptionFilters",
"logs:StartQuery",
"logs:DescribeMetricFilters",
"ecr:BatchDeleteImage",
"logs:ListLogDeliveries",
"ecr:DeleteRepository",
"logs:CreateLogStream",
"logs:TagLogGroup",
"logs:CancelExportTask",
"logs:GetLogEvents",
"logs:FilterLogEvents",
"logs:DescribeDestinations",
"sagemaker:CreateEndpoint",
"logs:StopQuery",
"cloudwatch:GetMetricStatistics",
"logs:CreateLogGroup",
"ecr:PutImage",
"logs:PutMetricFilter",
"logs:CreateLogDelivery",
"servicecatalog:ListAcceptedPortfolioShares",
"sagemaker:CreateEndpointConfig",
"logs:PutResourcePolicy",
"logs:DescribeExportTasks",
"sagemaker:ListActions",
"logs:GetQueryResults",
"sagemaker:DescribeEndpointConfig",
"logs:UpdateLogDelivery",
"ecr:BatchGetImage",
"logs:PutSubscriptionFilter",
"ecr:InitiateLayerUpload",
"logs:ListTagsLogGroup",
"sagemaker:EnableSagemakerServicecatalogPortfolio",
"logs:DescribeLogStreams",
"ecr:UploadLayerPart",
"logs:GetLogDelivery",
"cloudwatch:ListMetrics",
"servicecatalog:AcceptPortfolioShare",
"logs:CreateExportTask",
"ecr:CompleteLayerUpload",
"logs:AssociateKmsKey",
"sagemaker:DescribeEndpoint",
"logs:DescribeQueryDefinitions",
"logs:PutDestination",
"logs:DescribeResourcePolicies",
"ecr:DeleteRepositoryPolicy",
"logs:DescribeQueries",
"logs:DisassociateKmsKey",
"sagemaker:DeleteApp",
"logs:UntagLogGroup",
"logs:DescribeLogGroups",
"logs:PutDestinationPolicy",
"logs:TestMetricFilter",
"logs:PutQueryDefinition",
"logs:DeleteDestination",
"logs:PutLogEvents",
"s3:ListAllMyBuckets",
"ecr:SetRepositoryPolicy",
"logs:PutRetentionPolicy",
"logs:GetLogGroupFields"
],
"Resource": "*"
},
{
"Sid": "VisualEditor2",
"Effect": "Allow",
"Action": [
"s3:ListBucket",
"sagemaker:CreateApp"
],
"Resource": [
"arn:aws:sagemaker:*:216283767174:app/*/*/*/*",
"arn:aws:s3:::qfn-transcription/*"
]
},
{
"Sid": "VisualEditor3",
"Effect": "Allow",
"Action": "sagemaker:DescribeApp",
"Resource": "arn:aws:sagemaker:*:216283767174:app/*/*/*/*"
},
{
"Sid": "VisualEditor4",
"Effect": "Allow",
"Action": [
"sagemaker:DescribeTrainingJob",
"sagemaker:CreateMonitoringSchedule",
"sagemaker:PutRecord",
"sagemaker:CreateTrainingJob",
"sagemaker:CreateProcessingJob"
],
"Resource": [
"arn:aws:sagemaker:*:216283767174:feature-group/*",
"arn:aws:sagemaker:*:216283767174:monitoring-schedule/*",
"arn:aws:sagemaker:*:216283767174:processing-job/*",
"arn:aws:sagemaker:*:216283767174:training-job/*"
]
},
{
"Sid": "VisualEditor5",
"Effect": "Allow",
"Action": [
"sagemaker:DescribeNotebookInstanceLifecycleConfig",
"sagemaker:StopNotebookInstance",
"sagemaker:DescribeNotebookInstance"
],
"Resource": [
"arn:aws:sagemaker:*:216283767174:feature-group/*",
"arn:aws:sagemaker:*:216283767174:notebook-instance-lifecycle-config/*",
"arn:aws:sagemaker:*:216283767174:notebook-instance/*"
]
},
{
"Sid": "VisualEditor6",
"Effect": "Allow",
"Action": [
"ecr:SetRepositoryPolicy",
"ecr:CompleteLayerUpload",
"ecr:BatchGetImage",
"ecr:BatchDeleteImage",
"ecr:UploadLayerPart",
"ecr:DeleteRepositoryPolicy",
"ecr:InitiateLayerUpload",
"ecr:DeleteRepository",
"ecr:PutImage"
],
"Resource": "arn:aws:ecr:*:*:repository/*"
}
]
}```