Configure and deploy models on Sagemaker

Deploy the VGG19 model to AWS sagemaker and make inference

Introduction

AWS SageMaker is an excellent ML platform for conducting MLOps, simplifying model deployment, seamlessly integrating with other AWS services, and enabling rapid iteration and experimentation. In this blog post, we’ll delve into the process of deploying a VGG19 model to AWS SageMaker, covering the steps involved in training the model, creating a SageMaker endpoint, and making real-time inferences.

Get the model

the model have been trained and saved on /home/sagemaker-user/model/vgg19/model_file,

see the blog for more info

Make the inference.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import json
import tensorflow as tf
from PIL import Image
import numpy as np
import requests
import io
import base64

def model_fn(model_dir):
model = tf.saved_model.load(model_dir)
return model

def predict_fn(input_data, model):
outputs = model(input_data)
return outputs

def input_fn(request_body, request_content_type='application/json'):
if request_content_type == 'application/json':
# Parse the image URL or Base64 data from the request body
data = json.loads(request_body)
image_data = data['image'] # Assume the JSON contains an 'image' key

# If it's a URL, download the image
if 'http' in image_data:
image = Image.open(requests.get(image_data, stream=True).raw)
else:
# If it's Base64 encoded, decode it
image = Image.open(io.BytesIO(base64.b64decode(image_data)))

# Convert to a format suitable for model input
image = image.resize((224, 224)) # Assume VGG19 input size is 224x224
image = np.array(image) / 255.0 # Normalize to [0, 1]
image = np.expand_dims(image, axis=0) # Add batch dimension
return tf.convert_to_tensor(image, dtype=tf.float32)
else:
raise ValueError("Unsupported content type: {}".format(request_content_type))

def output_fn(prediction, response_content_type='application/json'):
if response_content_type == 'application/json':

style_output = {k: v.numpy().tolist() for k, v in prediction['style'].items()}
content_output = {k: v.numpy().tolist() for k, v in prediction['content'].items()}
result = {
'style': style_output,
'content': content_output
}
return json.dumps(result)
else:
raise ValueError("Unsupported content type: {}".format(response_content_type))
1
tar -czvf model_package.tar.gz /home/sagemaker-user/model/vgg19/model_file inference.py

Upload model to S3

1
2
3
4
5
6
7
8
import boto3

s3_client = boto3.client('s3')

bucket_name = 'bucket-name'
s3_file_path = 'path/to/model_package.tar.gz'
local_file_path = 'model_package.tar.gz'
s3_client.upload_file(local_file_path, bucket_name, s3_file_path)

Upload a custom Docker image to AWS ECR

authenticating to the AWS ECR repo

1
2
3
4
5
6
7
8
REGION=<my_aws_region>
ACCOUNT=<my_aws_account>

# Authenticate Docker to an AWS ECR registry
aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin <docker_registry_url>.dkr.ecr.$REGION.amazonaws.com

# Loging to your private AWS ECR registry
aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.$REGION.amazonaws.com

Generate requirements.txt for model dependency

1
2
3
4
5
6
7
8
9
10
11
packages = """
IPython
numpy
Pillow
tensorflow
matplotlib
requests
"""

with open('/home/sagemaker-user/model/vgg19/requirements.txt', 'w') as f:
f.write(packages.strip())

Make dockerfile

1
2
3
4
5
6
7
#  Use the SageMaker Tensorflow image as the base image
# 763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.0.0-gpu-py310
# region: us-east-1 for reference
FROM <docker_registry_url>.dkr.ecr.<my_aws_region>.amazonaws.com/tensorflow-inference:2.0.0-gpu-py310

# Install the additional dependency
RUN pip install /home/sagemaker-user/model/vgg19/requirements.txt

Build and push image file

1
2
3
4
5
6
7
8
9
10
docker build -t vgg19-image .

# Create the AWS ECR repository
aws ecr create-repository --repository-name vgg19-image

# Tag the image
docker tag vgg19-image:latest $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/vgg19-image:latest

# Push the tagged image to the AWS ECR repository
docker push $ACCOUNT.dkr.ecr.$REGION.amazonaws.com/vgg19-image:latest

Create a model in sagemaker

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import boto3
import sagemaker

sagemaker_client = boto3.client(service_name="sagemaker")
role = sagemaker.get_execution_role()

model_name = "<model-name>"

primary_container = {
"Image": "inference-image"
"ModelDataUrl": "s3://<model-path>.tar.gz"
}

create_model_response = sagemaker_client.create_model(
ModelName=model_name,
ExecutionRoleArn=role,
PrimaryContainer=primary_container)

Create an Endpoint

1
2
3
4
5
6
7
8
9
10
endpoint_config_name = "endpoint-config-name"

sagemaker_client.create_endpoint_config(
EndpointConfigName=endpoint_config_name,
ProductionVariants=[{
"InstanceType": "ml.g5.xlarge",
"InitialVariantWeight": 1,
"InitialInstanceCount": 1,
"ModelName": model_name,
"VariantName": "AllTraffic"}])

Check the endpoint status if it is in-service

we can start to invoke the endpoint to test it once the endpoint status is in-service

1
2
3
4
5
6
7
8
9
import time

while True:
response = sagemaker_client.describe_endpoint(EndpointName='endpoint-name')
status = response['EndpointStatus']
print(f'Endpoint status: {status}')
if status in ['InService', 'Failed']:
break
time.sleep(30)

Invoke the endpoint

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import boto3
from PIL import Image
import io
with open(file_name, "rb") as f:
payload = f.read()

sagemaker_runtime = boto3.client("runtime.sagemaker")
response = sagemaker_runtime.invoke_endpoint(
EndpointName=endpoint_name,
ContentType="image/x-image",
Body=payload
)

image_data = response["Body"].read()
image = Image.open(io.BytesIO(image_data))
image.show()

Review and potential improvements:

on making inference.py part, the ways of defining model_fn and predict_fn are a little bit simple and lack of flexibility.

Here is an good template of model_fn about setting up pretrain_model, loading configs, model_weights,joblib storage banks and transforms on enhancing performance, efficiency, and usability in machine learning workflows.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def model_fn(model_dir):
"""
This function is the first to get executed upon a prediction request,
it loads the model from the disk and returns the model object which will be used later for inference.
"""

# Load the config file
config = OmegaConf.load(os.path.join(model_dir, "ai_vad_config.yaml"))
config_model = config.model

# Load the model
model = AiVadModel(
box_score_thresh=config_model.box_score_thresh,
persons_only=config_model.persons_only,
min_bbox_area=config_model.min_bbox_area,
max_bbox_overlap=config_model.max_bbox_overlap,
enable_foreground_detections=config_model.enable_foreground_detections,
foreground_kernel_size=config_model.foreground_kernel_size,
foreground_binary_threshold=config_model.foreground_binary_threshold,
n_velocity_bins=config_model.n_velocity_bins,
use_velocity_features=config_model.use_velocity_features,
use_pose_features=config_model.use_pose_features,
use_deep_features=config_model.use_deep_features,
n_components_velocity=config_model.n_components_velocity,
n_neighbors_pose=config_model.n_neighbors_pose,
n_neighbors_deep=config_model.n_neighbors_deep,
)

# Load the model weights
model.load_state_dict(torch.load(os.path.join(model_dir, "ai_vad_weights.pth"), map_location=device), strict=False)

# Load the memory banks
velocity_estimator_memory_bank, pose_estimator_memory_bank, appearance_estimator_memory_bank = joblib.load(os.path.join(model_dir, "ai_vad_banks.joblib"))
if velocity_estimator_memory_bank is not None:
model.density_estimator.velocity_estimator.memory_bank = velocity_estimator_memory_bank
if pose_estimator_memory_bank is not None:
model.density_estimator.pose_estimator.memory_bank = pose_estimator_memory_bank
if appearance_estimator_memory_bank is not None:
model.density_estimator.appearance_estimator.memory_bank = appearance_estimator_memory_bank
model.density_estimator.fit()

# Move the entire model to device
model = model.to(device)

# get the transforms
transform_config = config.dataset.transform_config.eval if "transform_config" in config.dataset.keys() else None
image_size = (config.dataset.image_size[0], config.dataset.image_size[1])
center_crop = config.dataset.get("center_crop")
center_crop = tuple(center_crop) if center_crop is not None else None
normalization = InputNormalizationMethod(config.dataset.normalization)
transform = get_transforms(config=transform_config, image_size=image_size, center_crop=center_crop, normalization=normalization)

return model, transform

Conclusion

We have finished:

• Write the Sagemaker model serving script(inference.py)
• Upload the Model to S3
• Upload a custom Docker image to AWS ECR
• Create a Model in SageMaker
• Create an Endpoint Configuration
• Create an Endpoint
• Invoke the Endpoint

References

Deploy a custom ml model on AWS sagemaker

Configure and deploy models on Sagemaker

http://paddyzz.github.io/projects/Config_Sagemaker/

Author

Paddy

Posted on

31-07-2024

Updated on

24-10-2024

Categories
projects
Licensed under


Comments