aws
diff --git a/‎src/sagemaker/algorithm.py‎
Lines changed: 66 additions & 45 deletions b/‎src/sagemaker/algorithm.py‎
Lines changed: 66 additions & 45 deletions
diff --git a/‎src/sagemaker/chainer/estimator.py‎
Lines changed: 20 additions & 20 deletions b/‎src/sagemaker/chainer/estimator.py‎
Lines changed: 20 additions & 20 deletions
diff --git a/‎src/sagemaker/pytorch/estimator.py‎
Lines changed: 9 additions & 9 deletions b/‎src/sagemaker/pytorch/estimator.py‎
Lines changed: 9 additions & 9 deletions
@@ -13,15 +13,22 @@
 """Test docstring"""
 from __future__ import absolute_import
 
+from typing import Optional, Union, Dict, List
+
 import sagemaker
 import sagemaker.parameter
 from sagemaker import vpc_utils
 from sagemaker.deserializers import BytesDeserializer
 from sagemaker.deprecations import removed_kwargs
 from sagemaker.estimator import EstimatorBase
+from sagemaker.inputs import TrainingInput, FileSystemInput
 from sagemaker.serializers import IdentitySerializer
 from sagemaker.transformer import Transformer
 from sagemaker.predictor import Predictor
+from sagemaker.session import Session
+from sagemaker.workflow.entities import PipelineVariable
+
+from sagemaker.workflow import is_pipeline_variable
 
 
 class AlgorithmEstimator(EstimatorBase):
@@ -37,28 +44,28 @@ class AlgorithmEstimator(EstimatorBase):
 
     def __init__(
         self,
-        algorithm_arn,
-        role,
-        instance_count,
-        instance_type,
-        volume_size=30,
-        volume_kms_key=None,
-        max_run=24 * 60 * 60,
-        input_mode="File",
-        output_path=None,
-        output_kms_key=None,
-        base_job_name=None,
-        sagemaker_session=None,
-        hyperparameters=None,
-        tags=None,
-        subnets=None,
-        security_group_ids=None,
-        model_uri=None,
-        model_channel_name="model",
-        metric_definitions=None,
-        encrypt_inter_container_traffic=False,
-        use_spot_instances=False,
-        max_wait=None,
+        algorithm_arn: str,
+        role: str,
+        instance_count: Optional[Union[int, PipelineVariable]] = None,
+        instance_type: Optional[Union[str, PipelineVariable]] = None,
+        volume_size: Union[int, PipelineVariable] = 30,
+        volume_kms_key: Optional[Union[str, PipelineVariable]] = None,
+        max_run: Union[int, PipelineVariable] = 24 * 60 * 60,
+        input_mode: Union[str, PipelineVariable] = "File",
+        output_path: Optional[Union[str, PipelineVariable]] = None,
+        output_kms_key: Optional[Union[str, PipelineVariable]] = None,
+        base_job_name: Optional[str] = None,
+        sagemaker_session: Optional[Session] = None,
+        hyperparameters: Optional[Dict[str, Union[str, PipelineVariable]]] = None,
+        tags: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
+        subnets: Optional[List[Union[str, PipelineVariable]]] = None,
+        security_group_ids: Optional[List[Union[str, PipelineVariable]]] = None,
+        model_uri: Optional[str] = None,
+        model_channel_name: Union[str, PipelineVariable] = "model",
+        metric_definitions: Optional[List[Dict[str, Union[str, PipelineVariable]]]] = None,
+        encrypt_inter_container_traffic: Union[bool, PipelineVariable] = False,
+        use_spot_instances: Union[bool, PipelineVariable] = False,
+        max_wait: Optional[Union[int, PipelineVariable]] = None,
         **kwargs  # pylint: disable=W0613
     ):
         """Initialize an ``AlgorithmEstimator`` instance.
@@ -71,18 +78,21 @@ def __init__(
                 access training data and model artifacts. After the endpoint
                 is created, the inference code might use the IAM role, if it
                 needs to access an AWS resource.
-            instance_count (int): Number of Amazon EC2 instances to use for training.
-            instance_type (str): Type of EC2 instance to use for training, for example, 'ml.c4.xlarge'.
-            volume_size (int): Size in GB of the EBS volume to use for
+            instance_count (int or PipelineVariable): Number of Amazon EC2 instances to use
+                for training.
+            instance_type (str or PipelineVariable): Type of EC2 instance to use for training,
+                for example, 'ml.c4.xlarge'.
+            volume_size (int or PipelineVariable): Size in GB of the EBS volume to use for
                 storing input data during training (default: 30). Must be large enough to store
                 training data if File Mode is used (which is the default).
-            volume_kms_key (str): Optional. KMS key ID for encrypting EBS volume attached
-                to the training instance (default: None).
-            max_run (int): Timeout in seconds for training (default: 24 * 60 * 60).
+            volume_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting
+                EBS volume attached to the training instance (default: None).
+            max_run (int or PipelineVariable): Timeout in seconds for training
+                (default: 24 * 60 * 60).
                 After this amount of time Amazon SageMaker terminates the
                 job regardless of its current status.
-            input_mode (str): The input mode that the algorithm supports
-            (default: 'File'). Valid modes:
+            input_mode (str or PipelineVariable): The input mode that the algorithm supports
+                (default: 'File'). Valid modes:
 
                 * 'File' - Amazon SageMaker copies the training dataset from
                   the S3 location to a local directory.
@@ -92,13 +102,14 @@ def __init__(
                 This argument can be overriden on a per-channel basis using
                 ``sagemaker.inputs.TrainingInput.input_mode``.
 
-            output_path (str): S3 location for saving the training result (model artifacts and
-                output files). If not specified, results are stored to a default bucket. If
+            output_path (str or PipelineVariable): S3 location for saving the training result
+                (model artifacts and output files). If not specified,
+                results are stored to a default bucket. If
                 the bucket with the specific name does not exist, the
                 estimator creates the bucket during the
                 :meth:`~sagemaker.estimator.EstimatorBase.fit` method
                 execution.
-            output_kms_key (str): Optional. KMS key ID for encrypting the
+            output_kms_key (str or PipelineVariable): Optional. KMS key ID for encrypting the
                 training output (default: None). base_job_name (str): Prefix for
                 training job name when the
                 :meth:`~sagemaker.estimator.EstimatorBase.fit`
@@ -109,9 +120,10 @@ def __init__(
                 interactions with Amazon SageMaker APIs and any other AWS services needed. If
                 not specified, the estimator creates one using the default
                 AWS configuration chain.
-            tags (list[dict]): List of tags for labeling a training job. For more, see
+            tags (list[dict[str, str] or list[dict[str, PipelineVariable]]): List of tags for
+                labeling a training job. For more, see
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.
-            subnets (list[str]): List of subnet ids. If not specified
+            subnets (list[str] or list[PipelineVariable]): List of subnet ids. If not specified
                 training job will be created without VPC config.
                 security_group_ids (list[str]): List of security group ids. If
                 not specified training job will be created without VPC config.
@@ -122,22 +134,22 @@ def __init__(
                 other artifacts coming from a different source.
                 More information:
                 https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html#td-deserialization
-            model_channel_name (str): Name of the channel where 'model_uri'
+            model_channel_name (str or PipelineVariable): Name of the channel where 'model_uri'
                 will be downloaded (default: 'model'). metric_definitions
                 (list[dict]): A list of dictionaries that defines the metric(s)
                 used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for
                 the name of the metric, and 'Regex' for the regular
                 expression used to extract the metric from the logs.
-            encrypt_inter_container_traffic (bool): Specifies whether traffic between training
-                containers is encrypted for the training job (default: ``False``).
-            use_spot_instances (bool): Specifies whether to use SageMaker
+            encrypt_inter_container_traffic (bool or PipelineVariable): Specifies whether traffic
+                between training containers is encrypted for the training job (default: ``False``).
+            use_spot_instances (bool or PipelineVariable): Specifies whether to use SageMaker
                 Managed Spot instances for training. If enabled then the
                 `max_wait` arg should also be set.
 
                 More information:
                 https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html
                 (default: ``False``).
-            max_wait (int): Timeout in seconds waiting for spot training
+            max_wait (int or PipelineVariable): Timeout in seconds waiting for spot training
                 instances (default: None). After this amount of time Amazon
                 SageMaker will stop waiting for Spot instances to become
                 available (default: ``None``).
@@ -186,22 +198,25 @@ def validate_train_spec(self):
         # Check that the input mode provided is compatible with the training input modes for the
         # algorithm.
         input_modes = self._algorithm_training_input_modes(train_spec["TrainingChannels"])
-        if self.input_mode not in input_modes:
+        if not is_pipeline_variable(self.input_mode) and self.input_mode not in input_modes:
             raise ValueError(
                 "Invalid input mode: %s. %s only supports: %s"
                 % (self.input_mode, algorithm_name, input_modes)
             )
 
         # Check that the training instance type is compatible with the algorithm.
         supported_instances = train_spec["SupportedTrainingInstanceTypes"]
-        if self.instance_type not in supported_instances:
+        if (
+            not is_pipeline_variable(self.instance_type)
+            and self.instance_type not in supported_instances
+        ):
             raise ValueError(
                 "Invalid instance_type: %s. %s supports the following instance types: %s"
                 % (self.instance_type, algorithm_name, supported_instances)
             )
 
         # Verify if distributed training is supported by the algorithm
-        if (
+        if not is_pipeline_variable(self.instance_count) and (
             self.instance_count > 1
             and "SupportsDistributedTraining" in train_spec
             and not train_spec["SupportsDistributedTraining"]
@@ -414,12 +429,18 @@ def _prepare_for_training(self, job_name=None):
 
         super(AlgorithmEstimator, self)._prepare_for_training(job_name)
 
-    def fit(self, inputs=None, wait=True, logs=True, job_name=None):
+    def fit(
+        self,
+        inputs: Optional[Union[str, Dict, TrainingInput, FileSystemInput]] = None,
+        wait: bool = True,
+        logs: bool = True,
+        job_name: Optional[str] = None,
+    ):
         """Placeholder docstring"""
         if inputs:
             self._validate_input_channels(inputs)
 
-        super(AlgorithmEstimator, self).fit(inputs, wait, logs, job_name)
+        return super(AlgorithmEstimator, self).fit(inputs, wait, logs, job_name)
 
     def _validate_input_channels(self, channels):
         """Placeholder docstring"""
 
@@ -14,7 +14,7 @@
 from __future__ import absolute_import
 
 import logging
-from typing import Union, Optional
+from typing import Union, Optional, Dict
 
 from sagemaker.estimator import Framework, EstimatorBase
 from sagemaker.fw_utils import (
@@ -34,26 +34,26 @@
 class Chainer(Framework):
     """Handle end-to-end training and deployment of custom Chainer code."""
 
-    _framework_name = "chainer"
+    _framework_name: str = "chainer"
 
     # Hyperparameters
-    _use_mpi = "sagemaker_use_mpi"
-    _num_processes = "sagemaker_num_processes"
-    _process_slots_per_host = "sagemaker_process_slots_per_host"
-    _additional_mpi_options = "sagemaker_additional_mpi_options"
+    _use_mpi: str = "sagemaker_use_mpi"
+    _num_processes: str = "sagemaker_num_processes"
+    _process_slots_per_host: str = "sagemaker_process_slots_per_host"
+    _additional_mpi_options: str = "sagemaker_additional_mpi_options"
 
     def __init__(
         self,
         entry_point: Union[str, PipelineVariable],
-        use_mpi=None,
-        num_processes=None,
-        process_slots_per_host=None,
-        additional_mpi_options=None,
+        use_mpi: Optional[Union[bool, PipelineVariable]] = None,
+        num_processes: Optional[Union[int, PipelineVariable]] = None,
+        process_slots_per_host: Optional[Union[int, PipelineVariable]] = None,
+        additional_mpi_options: Optional[Union[str, PipelineVariable]] = None,
         source_dir: Optional[Union[str, PipelineVariable]] = None,
-        hyperparameters=None,
-        framework_version=None,
-        py_version=None,
-        image_uri=None,
+        hyperparameters: Optional[Dict[str, Union[str, PipelineVariable]]] = None,
+        framework_version: Optional[str] = None,
+        py_version: Optional[str] = None,
+        image_uri: Optional[Union[str, PipelineVariable]] = None,
         **kwargs
     ):
         """This ``Estimator`` executes an Chainer script in a managed execution environment.
@@ -78,26 +78,26 @@ def __init__(
                 file which should be executed as the entry point to training.
                 If ``source_dir`` is specified, then ``entry_point``
                 must point to a file located at the root of ``source_dir``.
-            use_mpi (bool): If true, entry point is run as an MPI script. By
+            use_mpi (bool or PipelineVariable): If true, entry point is run as an MPI script. By
                 default, the Chainer Framework runs the entry point with
                 'mpirun' if more than one instance is used.
-            num_processes (int): Total number of processes to run the entry
+            num_processes (int or PipelineVariable): Total number of processes to run the entry
                 point with. By default, the Chainer Framework runs one process
                 per GPU (on GPU instances), or one process per host (on CPU
                 instances).
-            process_slots_per_host (int): The number of processes that can run
+            process_slots_per_host (int or PipelineVariable): The number of processes that can run
                 on each instance. By default, this is set to the number of GPUs
                 on the instance (on GPU instances), or one (on CPU instances).
-            additional_mpi_options (str): String of options to the 'mpirun'
+            additional_mpi_options (str or PipelineVariable): String of options to the 'mpirun'
                 command used to run the entry point. For example, '-X
                 NCCL_DEBUG=WARN' will pass that option string to the mpirun
                 command.
             source_dir (str or PipelineVariable): Path (absolute or relative) to a directory with
                 any other training source code dependencies aside from the entry
                 point file (default: None). Structure within this directory are
                 preserved when training on Amazon SageMaker.
-            hyperparameters (dict): Hyperparameters that will be used for
-                training (default: None). The hyperparameters are made
+            hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters
+                that will be used for training (default: None). The hyperparameters are made
                 accessible as a dict[str, str] to the training code on
                 SageMaker. For convenience, this accepts other types for keys
                 and values, but ``str()`` will be called to convert them before
 
@@ -14,7 +14,7 @@
 from __future__ import absolute_import
 
 import logging
-from typing import Union, Optional
+from typing import Union, Optional, Dict
 
 from packaging.version import Version
 
@@ -44,12 +44,12 @@ class PyTorch(Framework):
     def __init__(
         self,
         entry_point: Union[str, PipelineVariable],
-        framework_version=None,
-        py_version=None,
+        framework_version: Optional[str] = None,
+        py_version: Optional[str] = None,
         source_dir: Optional[Union[str, PipelineVariable]] = None,
-        hyperparameters=None,
-        image_uri=None,
-        distribution=None,
+        hyperparameters: Optional[Dict[str, Union[str, PipelineVariable]]] = None,
+        image_uri: Optional[Union[str, PipelineVariable]] = None,
+        distribution: Optional[Dict] = None,
         **kwargs
     ):
         """This ``Estimator`` executes a PyTorch script in a managed PyTorch execution environment.
@@ -86,13 +86,13 @@ def __init__(
                 point file (default: None). If ``source_dir`` is an S3 URI, it must
                 point to a tar.gz file. Structure within this directory are preserved
                 when training on Amazon SageMaker.
-            hyperparameters (dict): Hyperparameters that will be used for
-                training (default: None). The hyperparameters are made
+            hyperparameters (dict[str, str] or dict[str, PipelineVariable]): Hyperparameters
+                that will be used for training (default: None). The hyperparameters are made
                 accessible as a dict[str, str] to the training code on
                 SageMaker. For convenience, this accepts other types for keys
                 and values, but ``str()`` will be called to convert them before
                 training.
-            image_uri (str): If specified, the estimator will use this image
+            image_uri (str or PipelineVariable): If specified, the estimator will use this image
                 for training and hosting, instead of selecting the appropriate
                 SageMaker official image based on framework_version and
                 py_version. It can be an ECR url or dockerhub image and tag.