from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.serverless import ServerlessInferenceConfig
import json
# Hub Model configuration. <https://huggingface.co/models>
hub = {
'HF_MODEL_ID':'openai/whisper-base',
'HF_TASK':'automatic-speech-recognition',
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
transformers_version='4.26.0',
pytorch_version='1.13.1',
py_version='py39',
env=hub,
role=role,
)
# Specify MemorySizeInMB and MaxConcurrency in the serverless config object
serverless_config = ServerlessInferenceConfig(
memory_size_in_mb=3072, max_concurrency=2,
)
# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
serverless_inference_config=serverless_config
)
I’v tried this too
from transformers import pipeline
import torch
# Assuming you have already defined `device`
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=tokenizer,
feature_extractor=ckpt,
framework="pt",
device=device,
)
but
I’v got error
ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from model with message "{
"code": 400,
"type": "InternalServerException",
"message": "Could not load model /.sagemaker/mms/models/brainer__whisper-medium-korean with any of the following classes: (\u003cclass \u0027transformers.models.auto.modeling_auto.AutoModelForCTC\u0027\u003e, \u003cclass \u0027transformers.models.auto.modeling_auto.AutoModelForSpeechSeq2Seq\u0027\u003e, \u003cclass \u0027transformers.models.whisper.modeling_whisper.WhisperForConditionalGeneration\u0027\u003e)."
}
How to solve it?