Skip to content

Commit 115e7a8

Browse files
ChoiByungWookJonathan Esterhazy
authored andcommitted
add Elastic Inference support
1 parent 51e0305 commit 115e7a8

28 files changed

+468
-109
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
CHANGELOG
33
=========
44

5+
=======
56
1.16.0.dev
6-
==========
7+
=======
78

89
* feature: Estimators: Add RLEstimator to provide support for Reinforcement Learning.
10+
* feature: Add support for Amazon Elastic Inference
911

1012
1.15.2
1113
======

src/sagemaker/chainer/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py3',
7070
self.framework_version = framework_version
7171
self.model_server_workers = model_server_workers
7272

73-
def prepare_container_def(self, instance_type):
73+
def prepare_container_def(self, instance_type, accelerator_type=None):
7474
"""Return a container definition with framework configuration set in model environment variables.
7575
7676
Args:
7777
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
78+
accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
79+
making inferences to the model. For example, 'ml.eia1.medium'.
7880
7981
Returns:
8082
dict[str, str]: A container definition object usable with the CreateModel API.
@@ -83,7 +85,7 @@ def prepare_container_def(self, instance_type):
8385
if not deploy_image:
8486
region_name = self.sagemaker_session.boto_session.region_name
8587
deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
86-
self.framework_version, self.py_version)
88+
self.framework_version, self.py_version, accelerator_type=accelerator_type)
8789

8890
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
8991
self._upload_code(deploy_key_prefix)

src/sagemaker/estimator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ def attach(cls, training_job_name, sagemaker_session=None, model_channel_name='m
310310
estimator.latest_training_job.wait()
311311
return estimator
312312

313-
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_compiled_model=False, **kwargs):
313+
def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None,
314+
use_compiled_model=False, **kwargs):
314315
"""Deploy the trained model to an Amazon SageMaker endpoint and return a ``sagemaker.RealTimePredictor`` object.
315316
316317
More information:
@@ -320,6 +321,10 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_
320321
initial_instance_count (int): Minimum number of EC2 instances to deploy to an endpoint for prediction.
321322
instance_type (str): Type of EC2 instance to deploy to an endpoint for prediction,
322323
for example, 'ml.c4.xlarge'.
324+
accelerator_type (str): Type of Elastic Inference accelerator to attach to an endpoint for model loading
325+
and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
326+
will be attached to the endpoint.
327+
For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
323328
endpoint_name (str): Name to use for creating an Amazon SageMaker endpoint. If not specified, the name of
324329
the training job is used.
325330
use_compiled_model (bool): Flag to select whether to use compiled (optimized) model. Default: False.
@@ -345,6 +350,7 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, use_
345350
return model.deploy(
346351
instance_type=instance_type,
347352
initial_instance_count=initial_instance_count,
353+
accelerator_type=accelerator_type,
348354
endpoint_name=endpoint_name)
349355

350356
@property

src/sagemaker/fw_utils.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,12 @@
3939
'Please add framework_version={} to your constructor to avoid this error.'
4040

4141
VALID_PY_VERSIONS = ['py2', 'py3']
42+
VALID_EIA_FRAMEWORKS = ['tensorflow', 'mxnet']
43+
VALID_ACCOUNTS_BY_REGION = {'us-gov-west-1': '246785580436'}
4244

4345

4446
def create_image_uri(region, framework, instance_type, framework_version, py_version=None,
45-
account='520713654638', optimized_families=None):
46-
47+
account='520713654638', accelerator_type=None, optimized_families=None):
4748
"""Return the ECR URI of an image.
4849
4950
Args:
@@ -54,6 +55,7 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
5455
py_version (str): Optional. Python version. If specified, should be one of 'py2' or 'py3'.
5556
If not specified, image uri will not include a python component.
5657
account (str): AWS account that contains the image. (default: '520713654638')
58+
accelerator_type (str): SageMaker Elastic Inference accelerator type.
5759
optimized_families (str): Instance families for which there exist specific optimized images.
5860
5961
Returns:
@@ -65,8 +67,7 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
6567
raise ValueError('invalid py_version argument: {}'.format(py_version))
6668

6769
# Handle Account Number for Gov Cloud
68-
if region == 'us-gov-west-1':
69-
account = '246785580436'
70+
account = VALID_ACCOUNTS_BY_REGION.get(region, account)
7071

7172
# Handle Local Mode
7273
if instance_type.startswith('local'):
@@ -90,10 +91,34 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
9091
tag = "{}-{}-{}".format(framework_version, device_type, py_version)
9192
else:
9293
tag = "{}-{}".format(framework_version, device_type)
94+
95+
if _accelerator_type_valid_for_framework(framework=framework, accelerator_type=accelerator_type,
96+
optimized_families=optimized_families):
97+
framework += '-eia'
98+
9399
return "{}.dkr.ecr.{}.amazonaws.com/sagemaker-{}:{}" \
94100
.format(account, region, framework, tag)
95101

96102

103+
def _accelerator_type_valid_for_framework(framework, accelerator_type=None, optimized_families=None):
104+
if accelerator_type is None:
105+
return False
106+
107+
if framework not in VALID_EIA_FRAMEWORKS:
108+
raise ValueError('{} is not supported with Amazon Elastic Inference. Currently only '
109+
'TensorFlow and MXNet are supported for SageMaker.'.format(framework))
110+
111+
if optimized_families:
112+
raise ValueError('Neo does not support Amazon Elastic Inference.')
113+
114+
if not accelerator_type.startswith('ml.eia') and not accelerator_type == 'local_sagemaker_notebook':
115+
raise ValueError('{} is not a valid SageMaker Elastic Inference accelerator type. '
116+
'See: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html'
117+
.format(accelerator_type))
118+
119+
return True
120+
121+
97122
def validate_source_dir(script, directory):
98123
"""Validate that the source directory exists and it contains the user script
99124

src/sagemaker/local/entities.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ def serve(self):
370370
instance_type = self.production_variant['InstanceType']
371371
instance_count = self.production_variant['InitialInstanceCount']
372372

373+
accelerator_type = self.production_variant.get('AcceleratorType')
374+
if accelerator_type == 'local_sagemaker_notebook':
375+
self.primary_container['Environment']['SAGEMAKER_INFERENCE_ACCELERATOR_PRESENT'] = 'true'
376+
373377
self.create_time = datetime.datetime.now()
374378
self.container = _SageMakerContainer(instance_type, instance_count, image, self.local_session)
375379
self.container.serve(self.primary_container['ModelDataUrl'], self.primary_container['Environment'])

src/sagemaker/model.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,16 @@ def __init__(self, model_data, image, role=None, predictor_cls=None, env=None, n
6767
self._model_name = None
6868
self._is_compiled_model = False
6969

70-
def prepare_container_def(self, instance_type): # pylint: disable=unused-argument
70+
def prepare_container_def(self, instance_type, accelerator_type=None): # pylint: disable=unused-argument
7171
"""Return a dict created by ``sagemaker.container_def()`` for deploying this model to a specified instance type.
7272
7373
Subclasses can override this to provide custom container definitions for
7474
deployment to a specific instance type. Called by ``deploy()``.
7575
7676
Args:
7777
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
78+
accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
79+
making inferences to the model. For example, 'ml.eia1.medium'.
7880
7981
Returns:
8082
dict: A container definition object usable with the CreateModel API.
@@ -168,7 +170,7 @@ def compile(self, target_instance_family, input_shape, output_path, role,
168170
self._is_compiled_model = True
169171
return self
170172

171-
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None):
173+
def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, tags=None):
172174
"""Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.
173175
174176
Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
@@ -184,6 +186,10 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
184186
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
185187
initial_instance_count (int): The initial number of instances to run in the
186188
``Endpoint`` created from this ``Model``.
189+
accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading
190+
and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
191+
will be attached to the endpoint.
192+
For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
187193
endpoint_name (str): The name of the endpoint to create (default: None).
188194
If not specified, a unique endpoint name will be created.
189195
tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint.
@@ -199,14 +205,15 @@ def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags
199205
self.sagemaker_session = session.Session()
200206

201207
compiled_model_suffix = '-'.join(instance_type.split('.')[:-1])
202-
container_def = self.prepare_container_def(instance_type)
208+
container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type)
203209
self.name = self.name or utils.name_from_image(container_def['Image'])
204210
if self.role is None:
205211
raise ValueError("Role can not be null for deploying a model")
206212
if self._is_compiled_model:
207213
self.name += compiled_model_suffix
208214
self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config)
209-
production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count)
215+
production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count,
216+
accelerator_type=accelerator_type)
210217
if endpoint_name:
211218
self.endpoint_name = endpoint_name
212219
else:
@@ -294,13 +301,15 @@ def __init__(self, model_data, image, role, entry_point, source_dir=None, predic
294301
self.bucket, self.key_prefix = None, None
295302
self.uploaded_code = None
296303

297-
def prepare_container_def(self, instance_type): # pylint disable=unused-argument
304+
def prepare_container_def(self, instance_type, accelerator_type=None): # pylint disable=unused-argument
298305
"""Return a container definition with framework configuration set in model environment variables.
299306
300307
This also uploads user-supplied code to S3.
301308
302309
Args:
303310
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
311+
accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
312+
making inferences to the model. For example, 'ml.eia1.medium'.
304313
305314
Returns:
306315
dict[str, str]: A container definition object usable with the CreateModel API.

src/sagemaker/mxnet/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version='py2',
7070
self.framework_version = framework_version
7171
self.model_server_workers = model_server_workers
7272

73-
def prepare_container_def(self, instance_type):
73+
def prepare_container_def(self, instance_type, accelerator_type=None):
7474
"""Return a container definition with framework configuration set in model environment variables.
7575
7676
Args:
7777
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
78+
accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
79+
making inferences to the model. For example, 'ml.eia1.medium'.
7880
7981
Returns:
8082
dict[str, str]: A container definition object usable with the CreateModel API.
@@ -83,7 +85,7 @@ def prepare_container_def(self, instance_type):
8385
if not deploy_image:
8486
region_name = self.sagemaker_session.boto_session.region_name
8587
deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
86-
self.framework_version, self.py_version)
88+
self.framework_version, self.py_version, accelerator_type=accelerator_type)
8789

8890
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
8991
self._upload_code(deploy_key_prefix)

src/sagemaker/pytorch/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,13 @@ def __init__(self, model_data, role, entry_point, image=None, py_version=PYTHON_
6969
self.framework_version = framework_version
7070
self.model_server_workers = model_server_workers
7171

72-
def prepare_container_def(self, instance_type):
72+
def prepare_container_def(self, instance_type, accelerator_type=None):
7373
"""Return a container definition with framework configuration set in model environment variables.
7474
7575
Args:
7676
instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
77+
accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and
78+
making inferences to the model. For example, 'ml.eia1.medium'.
7779
7880
Returns:
7981
dict[str, str]: A container definition object usable with the CreateModel API.
@@ -82,7 +84,7 @@ def prepare_container_def(self, instance_type):
8284
if not deploy_image:
8385
region_name = self.sagemaker_session.boto_session.region_name
8486
deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
85-
self.framework_version, self.py_version)
87+
self.framework_version, self.py_version, accelerator_type=accelerator_type)
8688
deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
8789
self._upload_code(deploy_key_prefix)
8890
deploy_env = dict(self.env)

0 commit comments

Comments
 (0)