diff --git a/examples/tf_vision/README.md b/examples/tf_vision/README.md new file mode 100644 index 000000000..a3178a0be --- /dev/null +++ b/examples/tf_vision/README.md @@ -0,0 +1,111 @@ +# Tensor Flow Saved Model Inference Service + +In this example, we show how to use a pre-trained Tensorflow MobileNet V2 model in the saved model format for performing real time inference using MMS + +# Objective + +1. Demonstrate how to package a a pre-trained TensorFlow saved model in MMS +2. Demonstrate how to create custom service with pre-processing and post-processing + +# Pre-requisite +Install tensorflow + +``` +pip install tensorflow==1.15 +``` + +## Step 1 - Download the pre-trained MobileNet V2 Model + +You will need the model files to use for the export. Check this example's directory in case they're already downloaded. Otherwise, you can `curl` the files or download them via your browser: + +```bash +cd multi-model-server/examples/tf_vision + +curl -O http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz +tar -xvf ssd_mobilenet_v1_coco_2017_11_17.tar.gz +cp ssd_mobilenet_v1_coco_2017_11_17/saved_model/saved_model.pb . +``` + + +## Step 2 - Prepare the signature file + +Define model input name and shape in `signature.json` file. The signature for this example looks like below: + +```json +{ + "inputs": [ + { + "data_precision": "UINT8", + "data_name": "inputs", + "data_shape": [ + 1, + 224, + 224, + 3 + ] + } + ] +} +``` + +## Step 3 - Create custom service class + +We provid a custom service class template code in this folder: +1. [model_handler.py](./model_handler.py) - A generic based service class. +2. [tensorflow_saved_model_service.py](./tensorflow_saved_model_service.py) - A Tensorflow saved model base service class. +3. [tensorflow_vision_service.py](./tensorflow_vision_service.py) - A Tensorflow Vision service class. +4. [image.py](./image.py) - Utils for reshaping + +In this example, you can simple use the provided tensorflow_vision_service.py as user model archive entry point. + +## Step 4 - Package the model with `model-archiver` CLI utility + +In this step, we package the following: +1. pre-trained TensorFlow Saved Model we downloaded in Step 1. +2. signature.json file we prepared in step 2. +3. custom model service files we mentioned in step 3. + +We use `model-archiver` command line utility (CLI) provided by MMS. +Install `model-archiver` in case you have not: + +```bash +pip install model-archiver +``` + +This tool create a .mar file that will be provided to MMS for serving inference requests. In following command line, we specify 'tensorflow_vision_service:handle' as model archive entry point. + +```bash +cd multi-model-server/examples +model-archiver --model-name mobilenetv2 --model-path tf_vision --handler tensorflow_vision_service:handle +``` + +## Step 5 - Start the Inference Service + +Start the inference service by providing the 'mobilenetv2.mar' file we created in Step 4. + +MMS then extracts the resources (signature, saved model) we have packaged into .mar file and uses the extended custom service, to start the inference server. + +By default, the server is started on the localhost at port 8080. + +```bash +cd multi-model-server +multi-model-server --start --model-store examples --models ssd=mobilenetv2.mar +``` + +Awesome! we have successfully exported a pre-trained TF saved model model, extended MMS with custom preprocess/postprocess and started a inference service. + +**Note**: In this example, MMS loads the .mar file from the local file system. However, you can also store the archive (.mar file) over a network-accessible storage such as AWS S3, and use a URL such as http:// or https:// to indicate the model archive location. MMS is capable of loading the model archive over such URLs as well. + +## Step 6 - Test sample inference + +Let us try the inference server we just started. Open another terminal on the same host. Download a sample image, or try any jpeg. + +You can also use this image of three dogs on a beach. +![3 dogs on beach](../../docs/images/3dogs.jpg) + +Use curl to make a prediction call by passing the downloaded image as input to the prediction request. + +```bash +cd multi-model-server +curl -X POST http://127.0.0.1:8080/predictions/ssd -T docs/images/3dogs.jpg +``` diff --git a/examples/tf_vision/image.py b/examples/tf_vision/image.py new file mode 100644 index 000000000..5079e32e7 --- /dev/null +++ b/examples/tf_vision/image.py @@ -0,0 +1,72 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +""" +Image utils +""" +from io import BytesIO +import cv2 + +import numpy as np +from PIL import Image + + +def transform_shape(img_arr, dim_order='NHWC'): + """ + Rearrange image numpy array shape to 'NCHW' or 'NHWC' which + is valid for TF model input. + Input image array should have dim_order of 'HWC'. + + :param img_arr: numpy array + Image in numpy format with shape (height, width, channel) + :param dim_order: str + Output image dimension order. Valid values are 'NCHW' and 'NHWC' + + :return: numpy array + Image in numpy array format with dim_order shape + """ + assert dim_order in 'NCHW' or dim_order in 'NHWC', "dim_order must be 'NCHW' or 'NHWC'." + if dim_order == 'NCHW': + img_arr = np.transpose(img_arr, (2, 0, 1)) + output = np.expand_dims(img_arr, axis=0) + return output + + +def read(buf): + """ + Read and decode an image to a numpy array. + Input image numpy should have dim_order of 'HWC'. + + :param buf: image bytes + Binary image data as bytes. + :return: numpy array + A numpy array containing the image. + """ + return np.array(Image.open(BytesIO(buf))) + + +def resize(src, new_width, new_height, interp=2): + """ + Resizes image to new_width and new_height. + Input image numpy array should have dim_order of 'HWC'. + + :param src: numpy array + Source image in numpy array format + :param new_width: int + Width in pixel for resized image + :param new_height: int + Height in pixel for resized image + :param interp: int + interpolation method for all resizing operations + + :return: numpy array + An numpy array containing the resized image. + """ + return cv2.resize(src, dsize=(new_height, new_width), interpolation=interp) \ No newline at end of file diff --git a/examples/tf_vision/model_handler.py b/examples/tf_vision/model_handler.py new file mode 100644 index 000000000..bfceeefb7 --- /dev/null +++ b/examples/tf_vision/model_handler.py @@ -0,0 +1,97 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +""" +ModelHandler defines a base model handler. +""" +import logging +import time + + +class ModelHandler(object): + """ + A base Model handler implementation. + """ + + def __init__(self): + self.error = None + self._context = None + self._batch_size = 0 + self.initialized = False + + def initialize(self, context): + """ + Initialize model. This will be called during model loading time + + :param context: Initial context contains model server system properties. + :return: + """ + self._context = context + self._batch_size = context.system_properties["batch_size"] + self.initialized = True + + def preprocess(self, batch): + """ + Transform raw input into model input data. + + :param batch: list of raw requests, should match batch size + :return: list of preprocessed model input data + """ + assert self._batch_size == len(batch), "Invalid input batch size: {}".format(len(batch)) + return None + + def inference(self, model_input): + """ + Internal inference methods + + :param model_input: transformed model input data + :return: list of inference output in NDArray + """ + return None + + def postprocess(self, inference_output): + """ + Return predict result in batch. + + :param inference_output: list of inference output + :return: list of predict results + """ + return ["OK"] * self._batch_size + + def handle(self, data, context): + """ + Custom service entry point function. + + :param data: list of objects, raw input from request + :param context: model server context + :return: list of outputs to be send back to client + """ + self.error = None # reset earlier errors + + try: + preprocess_start = time.time() + data = self.preprocess(data) + inference_start = time.time() + data = self.inference(data) + postprocess_start = time.time() + data = self.postprocess(data) + end_time = time.time() + + metrics = context.metrics + metrics.add_time("PreprocessTime", round((inference_start - preprocess_start) * 1000, 2)) + metrics.add_time("InferenceTime", round((postprocess_start - inference_start) * 1000, 2)) + metrics.add_time("PostprocessTime", round((end_time - postprocess_start) * 1000, 2)) + + return data + except Exception as e: + logging.error(e, exc_info=True) + request_processor = context.request_processor + request_processor.report_status(500, "Unknown inference error") + return [str(e)] * self._batch_size \ No newline at end of file diff --git a/examples/tf_vision/tensorflow_saved_model_service.py b/examples/tf_vision/tensorflow_saved_model_service.py new file mode 100644 index 000000000..32e3ce811 --- /dev/null +++ b/examples/tf_vision/tensorflow_saved_model_service.py @@ -0,0 +1,131 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +""" +TensorflowSavedModelService defines an API for running a tensorflow saved model +""" +import json +import os + +import tensorflow as tf + +from model_handler import ModelHandler + + +class TensorflowSavedModelService(ModelHandler): + """ + TensorflowSavedModelService defines the fundamental loading model and inference + operations when serving a TF saved model. This is a base class and needs to be + inherited. + """ + + def __init__(self): + super(TensorflowSavedModelService, self).__init__() + self.predictor = None + self.labels = None + self.signature = None + self.epoch = 0 + + # noinspection PyMethodMayBeStatic + def get_model_files_prefix(self, context): + return context.manifest["model"]["modelName"] + + def initialize(self, context): + """ + Initialize model. This will be called during model loading time + + :param context: Initial context contains model server system properties. + :return: + """ + super(TensorflowSavedModelService, self).initialize(context) + + properties = context.system_properties + model_dir = properties.get("model_dir") + + signature_file_path = os.path.join(model_dir, "signature.json") + if not os.path.isfile(signature_file_path): + raise RuntimeError("Missing signature.json file.") + + with open(signature_file_path) as f: + self.signature = json.load(f) + + #Define signature.json and work here + data_names = [] + data_shapes = [] + for input_data in self.signature["inputs"]: + data_name = input_data["data_name"] + data_shape = input_data["data_shape"] + + # Replace 0 entry in data shape with 1 for binding executor. + for idx in range(len(data_shape)): + if data_shape[idx] == 0: + data_shape[idx] = 1 + + data_names.append(data_name) + data_shapes.append((data_name, tuple(data_shape))) + + self.predictor = tf.contrib.predictor.from_saved_model(model_dir) + + def inference(self, model_input): + """ + Internal inference methods for TF - saved model. Run forward computation and + return output. + + :param model_input: list of dict of {name : numpy_array} + Batch of preprocessed inputs in tensor dict. + :return: list of dict of {name: numpy_array} + Batch of inference output tensor dict + """ + if self.error is not None: + return None + + # Check input shape + check_input_shape(model_input, self.signature) + + #Restricting to one request which contains the whole batch. Remove this line if adding custom batching support + model_input = model_input[0] + + results = self.predictor(model_input) + + return results + +def check_input_shape(inputs, signature): + """ + Check input data shape consistency with signature. + + Parameters + ---------- + inputs : List of dicts + Input data in this format [{input_name: input_tensor, input2_name: input2_tensor}, {...}] + signature : dict + Dictionary containing model signature. + """ + + assert isinstance(inputs, list), 'Input data must be a list.' + for input_dict in inputs: + assert isinstance(input_dict, dict), 'Each request must be dict of input_name: input_tensor.' + assert len(input_dict) == len(signature["inputs"]), \ + "Input number mismatches with " \ + "signature. %d expected but got %d." \ + % (len(signature['inputs']), len(input_dict)) + for tensor_name, sig_input in zip(input_dict, signature["inputs"]): + assert len(input_dict[tensor_name].shape) == len(sig_input["data_shape"]), \ + 'Shape dimension of input %s mismatches with ' \ + 'signature. %d expected but got %d.' \ + % (sig_input['data_name'], + len(sig_input['data_shape']), + len(input_dict[tensor_name].shape)) + for idx in range(len(input_dict[tensor_name].shape)): + if idx != 0 and sig_input['data_shape'][idx] != 0: + assert sig_input['data_shape'][idx] == input_dict[tensor_name].shape[idx], \ + 'Input %s has different shape with ' \ + 'signature. %s expected but got %s.' \ + % (sig_input['data_name'], sig_input['data_shape'], + input_dict[tensor_name].shape) diff --git a/examples/tf_vision/tensorflow_vision_service.py b/examples/tf_vision/tensorflow_vision_service.py new file mode 100644 index 000000000..d26028af6 --- /dev/null +++ b/examples/tf_vision/tensorflow_vision_service.py @@ -0,0 +1,92 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# http://www.apache.org/licenses/LICENSE-2.0 +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +""" +TensorflowVisionService defines a TF based vision service +""" +import logging + +from tensorflow_saved_model_service import TensorflowSavedModelService +import image + + +class TensorflowVisionService(TensorflowSavedModelService): + """ + TensorflowVisionService defines a fundamental service for image classification task. + In preprocess, input image buffer is read to numpy and resized respect to input + shape in signature. + In post process, raw tensors are returned. + """ + + def preprocess(self, request): + """ + Decode all input images into numpy array. + + Note: This implementation doesn't properly handle error cases in batch mode, + If one of the input images is corrupted, all requests in the batch will fail. + + :param request: + :return: + """ + img_list = [] + param_name = self.signature['inputs'][0]['data_name'] + input_shape = self.signature['inputs'][0]['data_shape'] + + for idx, data in enumerate(request): + img = data.get(param_name) + if img is None: + img = data.get("body") + + if img is None: + img = data.get("data") + + if img is None or len(img) == 0: + self.error = "Empty image input" + return None + + # We are assuming input shape is NHWC + [h, w] = input_shape[1:3] + + try: + img_arr = image.read(img) + except Exception as e: + logging.warn(e, exc_info=True) + self.error = "Corrupted image input" + return None + + img_arr = image.resize(img_arr, w, h) + img_arr = image.transform_shape(img_arr) + img_list.append(img_arr) + + #Convert to dict before returning [{name: image}] + img_list = [{param_name: img} for img in img_list] + return img_list + + def postprocess(self, data): + if self.error is not None: + return [self.error] * self._batch_size + + for key in data: + data[key] = str(data[key]) + + return [data] + + +_service = TensorflowVisionService() + + +def handle(data, context): + if not _service.initialized: + _service.initialize(context) + + if data is None: + return None + + return _service.handle(data, context) \ No newline at end of file