aws
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/fw_registry.py
Lines changed: 6 additions & 0 deletions b/‎src/sagemaker/fw_registry.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/sagemaker/fw_utils.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/fw_utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/sklearn/README.rst
Lines changed: 652 additions & 0 deletions b/‎src/sagemaker/sklearn/README.rst
Lines changed: 652 additions & 0 deletions
diff --git a/‎src/sagemaker/sklearn/__init__.py
Lines changed: 18 additions & 0 deletions b/‎src/sagemaker/sklearn/__init__.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/sagemaker/sklearn/defaults.py
Lines changed: 15 additions & 0 deletions b/‎src/sagemaker/sklearn/defaults.py
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/sagemaker/sklearn/estimator.py
Lines changed: 159 additions & 0 deletions b/‎src/sagemaker/sklearn/estimator.py
Lines changed: 159 additions & 0 deletions
diff --git a/‎src/sagemaker/sklearn/model.py
Lines changed: 96 additions & 0 deletions b/‎src/sagemaker/sklearn/model.py
Lines changed: 96 additions & 0 deletions
@@ -36,7 +36,7 @@ def read(fname):
       description="Open source library for training and deploying models on Amazon SageMaker.",
       packages=find_packages('src'),
       package_dir={'': 'src'},
-      py_modules=[os.splitext(os.basename(path))[0] for path in glob('src/*.py')],
+      py_modules=[os.path.splitext(os.path.basename(path))[0] for path in glob('src/*.py')],
       long_description=read('README.rst'),
       author="Amazon Web Services",
       url='https://github.com/aws/sagemaker-python-sdk/',
 
@@ -84,3 +84,9 @@ def registry(region_name, framework=None):
     except KeyError:
         logging.error("The specific image or region does not exist")
         raise
+
+
+def default_framework_uri(framework, region_name, image_tag):
+    repository_name = "sagemaker-{}".format(framework)
+    account_name = registry(region_name, framework)
+    return "{}/{}:{}".format(account_name, repository_name, image_tag)
@@ -193,7 +193,7 @@ def framework_name_from_image(image_name):
     else:
         # extract framework, python version and image tag
         # We must support both the legacy and current image name format.
-        name_pattern = re.compile('^sagemaker-(tensorflow|mxnet|chainer|pytorch):(.*?)-(.*?)-(py2|py3)$')
+        name_pattern = re.compile('^sagemaker-(tensorflow|mxnet|chainer|pytorch|scikit-learn):(.*?)-(.*?)-(py2|py3)$')
         legacy_name_pattern = re.compile('^sagemaker-(tensorflow|mxnet)-(py2|py3)-(cpu|gpu):(.*)$')
         name_match = name_pattern.match(sagemaker_match.group(8))
         legacy_match = legacy_name_pattern.match(sagemaker_match.group(8))
 
@@ -0,0 +1,18 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+from sagemaker.sklearn.estimator import SKLearn
+from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor
+
+__all__ = [SKLearn, SKLearnModel, SKLearnPredictor]
@@ -0,0 +1,15 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+SKLEARN_VERSION = '0.20.0'
@@ -0,0 +1,159 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import logging
+
+from sagemaker.estimator import Framework
+from sagemaker.fw_registry import default_framework_uri
+from sagemaker.fw_utils import framework_name_from_image, empty_framework_version_warning
+from sagemaker.sklearn.defaults import SKLEARN_VERSION
+from sagemaker.sklearn.model import SKLearnModel
+from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
+
+logging.basicConfig()
+logger = logging.getLogger('sagemaker')
+
+
+class SKLearn(Framework):
+    """Handle end-to-end training and deployment of custom Scikit-learn code."""
+
+    __framework_name__ = "scikit-learn"
+
+    def __init__(self, entry_point, framework_version=SKLEARN_VERSION, source_dir=None, hyperparameters=None,
+                 py_version='py3', image_name=None, **kwargs):
+        """
+        This ``Estimator`` executes an Scikit-learn script in a managed Scikit-learn execution environment, within a
+        SageMaker Training Job. The managed Scikit-learn environment is an Amazon-built Docker container that executes
+        functions defined in the supplied ``entry_point`` Python script.
+
+        Training is started by calling :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator.
+        After training is complete, calling :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a
+        hosted SageMaker endpoint and returns an :class:`~sagemaker.amazon.sklearn.model.SKLearnPredictor` instance
+        that can be used to perform inference against the hosted model.
+
+        Technical documentation on preparing Scikit-learn scripts for SageMaker training and using the Scikit-learn
+        Estimator is available on the project home-page: https://github.com/aws/sagemaker-python-sdk
+
+        Args:
+            entry_point (str): Path (absolute or relative) to the Python source file which should be executed
+                as the entry point to training. This should be compatible with either Python 2.7 or Python 3.5.
+            source_dir (str): Path (absolute or relative) to a directory with any other training
+                source code dependencies aside from tne entry point file (default: None). Structure within this
+                directory are preserved when training on Amazon SageMaker.
+            hyperparameters (dict): Hyperparameters that will be used for training (default: None).
+                The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
+                For convenience, this accepts other types for keys and values, but ``str()`` will be called
+                to convert them before training.
+            py_version (str): Python version you want to use for executing your model training code (default: 'py2').
+                              One of 'py2' or 'py3'.
+            framework_version (str): Scikit-learn version you want to use for executing your model training code.
+                List of supported versions https://github.com/aws/sagemaker-python-sdk#sklearn-sagemaker-estimators
+            image_name (str): If specified, the estimator will use this image for training and hosting, instead of
+                selecting the appropriate SageMaker official image based on framework_version and py_version. It can
+                be an ECR url or dockerhub image and tag.
+                Examples:
+                    123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
+                    custom-image:latest.
+            **kwargs: Additional kwargs passed to the :class:`~sagemaker.estimator.Framework` constructor.
+        """
+        # SciKit-Learn does not support distributed training or training on GPU instance types. Fail fast.
+        train_instance_type = kwargs.get('train_instance_type')
+        _validate_not_gpu_instance_type(train_instance_type)
+
+        train_instance_count = kwargs.get('train_instance_count')
+        if train_instance_count:
+            if train_instance_count != 1:
+                raise AttributeError("SciKit-Learn does not support distributed training. "
+                                     "Please remove the 'train_instance_count' argument or set "
+                                     "'train_instance_count=1' when initializing SKLearn.")
+        super(SKLearn, self).__init__(entry_point, source_dir, hyperparameters, image_name=image_name,
+                                      **dict(kwargs, train_instance_count=1))
+
+        self.py_version = py_version
+
+        if framework_version is None:
+            logger.warning(empty_framework_version_warning(SKLEARN_VERSION, SKLEARN_VERSION))
+        self.framework_version = framework_version or SKLEARN_VERSION
+
+        if image_name is None:
+            image_tag = "{}-{}-{}".format(framework_version, "cpu", py_version)
+            self.image_name = default_framework_uri(
+                SKLearn.__framework_name__,
+                self.sagemaker_session.boto_region_name,
+                image_tag)
+
+    def create_model(self, model_server_workers=None, role=None,
+                     vpc_config_override=VPC_CONFIG_DEFAULT, **kwargs):
+        """Create a SageMaker ``SKLearnModel`` object that can be deployed to an ``Endpoint``.
+
+        Args:
+            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during
+                transform jobs. If not specified, the role from the Estimator will be used.
+            model_server_workers (int): Optional. The number of worker processes used by the inference server.
+                If None, server will use one worker per vCPU.
+            vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model.
+                Default: use subnets and security groups from this Estimator.
+                * 'Subnets' (list[str]): List of subnet ids.
+                * 'SecurityGroupIds' (list[str]): List of security group ids.
+            **kwargs: Passed to initialization of ``SKLearnModel``.
+
+        Returns:
+            sagemaker.sklearn.model.SKLearnModel: A SageMaker ``SKLearnModel`` object.
+                See :func:`~sagemaker.sklearn.model.SKLearnModel` for full details.
+        """
+        role = role or self.role
+        return SKLearnModel(self.model_data, role, self.entry_point, source_dir=self._model_source_dir(),
+                            enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name,
+                            container_log_level=self.container_log_level, code_location=self.code_location,
+                            py_version=self.py_version, framework_version=self.framework_version,
+                            model_server_workers=model_server_workers, image=self.image_name,
+                            sagemaker_session=self.sagemaker_session,
+                            vpc_config=self.get_vpc_config(vpc_config_override),
+                            **kwargs)
+
+    @classmethod
+    def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None):
+        """Convert the job description to init params that can be handled by the class constructor
+
+        Args:
+            job_details: the returned job details from a describe_training_job API call.
+
+        Returns:
+             dictionary: The transformed init_params
+
+        """
+        init_params = super(SKLearn, cls)._prepare_init_params_from_job_description(job_details)
+
+        image_name = init_params.pop('image')
+        framework, py_version, _ = framework_name_from_image(image_name)
+        init_params['py_version'] = py_version
+
+        if framework and framework != cls.__framework_name__:
+            training_job_name = init_params['base_job_name']
+            raise ValueError("Training job: {} didn't use image for requested framework".format(training_job_name))
+        elif not framework:
+            # If we were unable to parse the framework name from the image it is not one of our
+            # officially supported images, in this case just add the image to the init params.
+            init_params['image_name'] = image_name
+        return init_params
+
+
+def _validate_not_gpu_instance_type(training_instance_type):
+    gpu_instance_types = ['ml.p2.xlarge', 'ml.p2.8xlarge', 'ml.p2.16xlarge',
+                          'ml.p3.xlarge', 'ml.p3.8xlarge', 'ml.p3.16xlarge']
+
+    if training_instance_type in gpu_instance_types:
+        raise ValueError("GPU training in not supported for SciKit-Learn. "
+                         "Please pick a different instance type from here: "
+                         "https://aws.amazon.com/ec2/instance-types/")
@@ -0,0 +1,96 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import sagemaker
+from sagemaker.fw_utils import create_image_uri, model_code_key_prefix
+from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME
+from sagemaker.predictor import RealTimePredictor, npy_serializer, numpy_deserializer
+from sagemaker.sklearn.defaults import SKLEARN_VERSION
+
+
+class SKLearnPredictor(RealTimePredictor):
+    """A RealTimePredictor for inference against Scikit-learn Endpoints.
+
+    This is able to serialize Python lists, dictionaries, and numpy arrays to multidimensional tensors for Scikit-learn
+    inference."""
+
+    def __init__(self, endpoint_name, sagemaker_session=None):
+        """Initialize an ``SKLearnPredictor``.
+
+        Args:
+            endpoint_name (str): The name of the endpoint to perform inference on.
+            sagemaker_session (sagemaker.session.Session): Session object which manages interactions with
+                Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one
+                using the default AWS configuration chain.
+        """
+        super(SKLearnPredictor, self).__init__(endpoint_name, sagemaker_session, npy_serializer, numpy_deserializer)
+
+
+class SKLearnModel(FrameworkModel):
+    """An Scikit-learn SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``."""
+
+    __framework_name__ = 'scikit-learn'
+
+    def __init__(self, model_data, role, entry_point, image=None, py_version='py3', framework_version=SKLEARN_VERSION,
+                 predictor_cls=SKLearnPredictor, model_server_workers=None, **kwargs):
+        """Initialize an SKLearnModel.
+
+        Args:
+            model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file.
+            role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs
+                that create Amazon SageMaker endpoints use this role to access training data and model artifacts.
+                After the endpoint is created, the inference code might use the IAM role,
+                if it needs to access an AWS resource.
+            entry_point (str): Path (absolute or relative) to the Python source file which should be executed
+                as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5.
+            image (str): A Docker image URI (default: None). If not specified, a default image for Scikit-learn
+                will be used.
+            py_version (str): Python version you want to use for executing your model training code (default: 'py2').
+            framework_version (str): Scikit-learn version you want to use for executing your model training code.
+            predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor
+                with an endpoint name and SageMaker ``Session``. If specified, ``deploy()`` returns the result of
+                invoking this function on the created endpoint name.
+            model_server_workers (int): Optional. The number of worker processes used by the inference server.
+                If None, server will use one worker per vCPU.
+            **kwargs: Keyword arguments passed to the ``FrameworkModel`` initializer.
+        """
+        super(SKLearnModel, self).__init__(model_data, image, role, entry_point, predictor_cls=predictor_cls,
+                                           **kwargs)
+        self.py_version = py_version
+        self.framework_version = framework_version
+        self.model_server_workers = model_server_workers
+
+    def prepare_container_def(self, instance_type):
+        """Return a container definition with framework configuration set in model environment variables.
+
+        Args:
+            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
+
+        Returns:
+            dict[str, str]: A container definition object usable with the CreateModel API.
+        """
+        deploy_image = self.image
+        if not deploy_image:
+            region_name = self.sagemaker_session.boto_session.region_name
+            deploy_image = create_image_uri(region_name, self.__framework_name__, instance_type,
+                                            self.framework_version, self.py_version)
+
+        deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
+        self._upload_code(deploy_key_prefix)
+        deploy_env = dict(self.env)
+        deploy_env.update(self._framework_env_vars())
+
+        if self.model_server_workers:
+            deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
+        return sagemaker.container_def(deploy_image, self.model_data, deploy_env)