doc: add documentation for image_uri serverless use case (#3022)

bhaoz · web-flow · commit d5eb7544fd83 · 2022-03-26T00:33:21.000-07:00
diff --git a/doc/overview.rst b/doc/overview.rst
@@ -1226,6 +1226,28 @@ to configure or manage the underlying infrastructure. After you trained a model,
 Serverless endpoint and then invoke the endpoint with the model to get inference results back. More information about
 SageMaker Serverless Inference can be found in the `AWS documentation <https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html>`__.
 
+For using SageMaker Serverless Inference, if you plan to use any of the SageMaker-provided container or Bring Your Own Container
+model, you will need to pass ``image_uri``. An example to use ``image_uri`` for creating MXNet model:
+
+.. code:: python
+
+    from sagemaker.mxnet import MXNetModel
+    import sagemaker
+
+    role = sagemaker.get_execution_role()
+
+    # create MXNet Model Class
+    mxnet_model = MXNetModel(
+        model_data="s3://my_bucket/pretrained_model/model.tar.gz", # path to your trained sagemaker model
+        role=role, # iam role with permissions to create an Endpoint
+        entry_point="inference.py",
+        image_uri="763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.4.1-cpu-py3" # image wanted to use
+    )
+
+For more Amazon SageMaker provided algorithms and containers image paths, please check this page: `Amazon SageMaker provided
+algorithms and Deep Learning Containers <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+After creating model using ``image_uri``, you can then follow the steps below to create serverless endpoint.
+
 To deploy serverless endpoint, you will need to create a ``ServerlessInferenceConfig``.
 If you create ``ServerlessInferenceConfig`` without specifying its arguments, the default ``MemorySizeInMB`` will be **2048** and
 the default ``MaxConcurrency`` will be **5** :
@@ -1254,6 +1276,14 @@ Then use the ``ServerlessInferenceConfig`` in the estimator's ``deploy()`` metho
     # Deploys the model that was generated by fit() to a SageMaker serverless endpoint
     serverless_predictor = estimator.deploy(serverless_inference_config=serverless_config)
 
+Or directly using model's ``deploy()`` method to deploy a serverless endpoint:
+
+.. code:: python
+
+    # Deploys the model to a SageMaker serverless endpoint
+    serverless_predictor = model.deploy(serverless_inference_config=serverless_config)
+
+
 After deployment is complete, you can use predictor's ``predict()`` method to invoke the serverless endpoint just like
 real-time endpoints:
 
diff --git a/src/sagemaker/chainer/model.py b/src/sagemaker/chainer/model.py
@@ -99,10 +99,14 @@ def __init__(
                 file which should be executed as the entry point to model
                 hosting. If ``source_dir`` is specified, then ``entry_point``
                 must point to a file located at the root of ``source_dir``.
-            image_uri (str): A Docker image URI (default: None). If not specified, a
-                default image for Chainer will be used. If ``framework_version``
-                or ``py_version`` are ``None``, then ``image_uri`` is required. If
-                also ``None``, then a ``ValueError`` will be raised.
+            image_uri (str): A Docker image URI (default: None). In serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                In instance based inference, if not specified, a default image for
+                Chainer will be used. If ``framework_version`` or ``py_version``
+                are ``None``, then ``image_uri`` is required. If also ``None``,
+                then a ``ValueError`` will be raised.
             framework_version (str): Chainer version you want to use for
                 executing your model training code. Defaults to ``None``. Required
                 unless ``image_uri`` is provided.
diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py
@@ -133,7 +133,11 @@ def __init__(
             py_version (str): Python version you want to use for executing your
                 model training code. Defaults to ``None``. Required unless
                 ``image_uri`` is provided.
-            image_uri (str): A Docker image URI. Defaults to None. If not specified, a
+            image_uri (str): A Docker image URI. Defaults to None. For serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a
                 default image for PyTorch will be used. If ``framework_version``
                 or ``py_version`` are ``None``, then ``image_uri`` is required. If
                 also ``None``, then a ``ValueError`` will be raised.
diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py
@@ -107,9 +107,12 @@ def __init__(
             py_version (str): Python version you want to use for executing your
                 model training code. Defaults to ``None``. Required unless
                 ``image_uri`` is provided.
-            image_uri (str): A Docker image URI (default: None). If not specified, a
-                default image for MXNet will be used.
-
+            image_uri (str): A Docker image URI (default: None). For serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a default image for
+                MXNet will be used.
                 If ``framework_version`` or ``py_version`` are ``None``, then
                 ``image_uri`` is required. If also ``None``, then a ``ValueError``
                 will be raised.
diff --git a/src/sagemaker/pytorch/model.py b/src/sagemaker/pytorch/model.py
@@ -107,10 +107,14 @@ def __init__(
             py_version (str): Python version you want to use for executing your
                 model training code. Defaults to ``None``. Required unless
                 ``image_uri`` is provided.
-            image_uri (str): A Docker image URI (default: None). If not specified, a
-                default image for PyTorch will be used. If ``framework_version``
-                or ``py_version`` are ``None``, then ``image_uri`` is required. If
-                also ``None``, then a ``ValueError`` will be raised.
+            image_uri (str): A Docker image URI (default: None). For serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a default image for
+                PyTorch will be used. If ``framework_version`` or ``py_version`` are
+                ``None``, then ``image_uri`` is required. If also ``None``, then a
+                ``ValueError`` will be raised.
             predictor_cls (callable[str, sagemaker.session.Session]): A function
                 to call to create a predictor with an endpoint name and
                 SageMaker ``Session``. If specified, ``deploy()`` returns the
diff --git a/src/sagemaker/sklearn/model.py b/src/sagemaker/sklearn/model.py
@@ -102,9 +102,12 @@ def __init__(
                 model training code (default: 'py3'). Currently, 'py3' is the only
                 supported version. If ``None`` is passed in, ``image_uri`` must be
                 provided.
-            image_uri (str): A Docker image URI (default: None). If not specified, a
-                default image for Scikit-learn will be used.
-
+            image_uri (str): A Docker image URI (default: None). For serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a default image for
+                Scikit-learn will be used.
                 If ``framework_version`` or ``py_version`` are ``None``, then
                 ``image_uri`` is required. If also ``None``, then a ``ValueError``
                 will be raised.
diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py
@@ -145,10 +145,14 @@ def __init__(
                 file which should be executed as the entry point to model
                 hosting. If ``source_dir`` is specified, then ``entry_point``
                 must point to a file located at the root of ``source_dir``.
-            image_uri (str): A Docker image URI (default: None). If not specified, a
-                default image for TensorFlow Serving will be used. If
-                ``framework_version`` is ``None``, then ``image_uri`` is required.
-                If also ``None``, then a ``ValueError`` will be raised.
+            image_uri (str): A Docker image URI (default: None). For serverless
+                inferece, it is required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a default image for
+                TensorFlow Serving will be used. If ``framework_version`` is ``None``,
+                then ``image_uri`` is required. If also ``None``, then a ``ValueError``
+                will be raised.
             framework_version (str): Optional. TensorFlow Serving version you
                 want to use. Defaults to ``None``. Required unless ``image_uri`` is
                 provided.
diff --git a/src/sagemaker/xgboost/model.py b/src/sagemaker/xgboost/model.py
@@ -91,8 +91,12 @@ def __init__(
             entry_point (str): Path (absolute or relative) to the Python source file which should
                 be executed  as the entry point to model hosting. If ``source_dir`` is specified,
                 then ``entry_point`` must point to a file located at the root of ``source_dir``.
-            image_uri (str): A Docker image URI (default: None). If not specified, a default image
-                for XGBoost is be used.
+            image_uri (str): A Docker image URI (default: None). For serverless inferece, it is
+                required. More image information can be found in
+                `Amazon SageMaker provided algorithms and Deep Learning Containers
+                <https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html>`_.
+                For instance based inference, if not specified, a default image for XGBoost
+                is be used.
             py_version (str): Python version you want to use for executing your model training code
                 (default: 'py3').
             framework_version (str): XGBoost version you want to use for executing your model