change: break out methods to get processing arguments (#1851)

metrizable · web-flow · commit 7a5e11f51524 · 2020-08-25T10:11:20.000-07:00
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
@@ -570,11 +570,48 @@ def start_new(cls, processor, inputs, outputs, experiment_config):
             :class:`~sagemaker.processing.ProcessingJob`: The instance of ``ProcessingJob`` created
                 using the ``Processor``.
         """
+        process_args = cls._get_process_args(processor, inputs, outputs, experiment_config)
+
+        # Print the job name and the user's inputs and outputs as lists of dictionaries.
+        print()
+        print("Job Name: ", process_args["job_name"])
+        print("Inputs: ", process_args["inputs"])
+        print("Outputs: ", process_args["output_config"]["Outputs"])
+
+        # Call sagemaker_session.process using the arguments dictionary.
+        processor.sagemaker_session.process(**process_args)
+
+        return cls(
+            processor.sagemaker_session,
+            processor._current_job_name,
+            inputs,
+            outputs,
+            processor.output_kms_key,
+        )
+
+    @classmethod
+    def _get_process_args(cls, processor, inputs, outputs, experiment_config):
+        """Gets a dict of arguments for a new Amazon SageMaker processing job from the processor
+
+        Args:
+            processor (:class:`~sagemaker.processing.Processor`): The ``Processor`` instance
+                that started the job.
+            inputs (list[:class:`~sagemaker.processing.ProcessingInput`]): A list of
+                :class:`~sagemaker.processing.ProcessingInput` objects.
+            outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): A list of
+                :class:`~sagemaker.processing.ProcessingOutput` objects.
+            experiment_config (dict[str, str]): Experiment management configuration.
+                Dictionary contains three optional keys:
+                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+
+        Returns:
+            Dict: dict for `sagemaker.session.Session.process` method
+        """
         # Initialize an empty dictionary for arguments to be passed to sagemaker_session.process.
         process_request_args = {}
 
         # Add arguments to the dictionary.
-        process_request_args["inputs"] = [input._to_request_dict() for input in inputs]
+        process_request_args["inputs"] = [inp._to_request_dict() for inp in inputs]
 
         process_request_args["output_config"] = {
             "Outputs": [output._to_request_dict() for output in outputs]
@@ -622,22 +659,7 @@ def start_new(cls, processor, inputs, outputs, experiment_config):
 
         process_request_args["tags"] = processor.tags
 
-        # Print the job name and the user's inputs and outputs as lists of dictionaries.
-        print()
-        print("Job Name: ", process_request_args["job_name"])
-        print("Inputs: ", process_request_args["inputs"])
-        print("Outputs: ", process_request_args["output_config"]["Outputs"])
-
-        # Call sagemaker_session.process using the arguments dictionary.
-        processor.sagemaker_session.process(**process_request_args)
-
-        return cls(
-            processor.sagemaker_session,
-            processor._current_job_name,
-            inputs,
-            outputs,
-            processor.output_kms_key,
-        )
+        return process_request_args
 
     @classmethod
     def from_processing_name(cls, sagemaker_session, processing_job_name):
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -636,7 +636,7 @@ def _get_train_request(  # noqa: C901
                 (default: ``None``).
 
         Returns:
-            Dict: a training request dictionary
+            Dict: a training request dict
         """
         train_request = {
             "AlgorithmSpecification": {"TrainingInputMode": input_mode},
@@ -756,6 +756,71 @@ def process(
                 three optional keys, 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
                 (default: ``None``)
         """
+        process_request = self._get_process_request(
+            inputs=inputs,
+            output_config=output_config,
+            job_name=job_name,
+            resources=resources,
+            stopping_condition=stopping_condition,
+            app_specification=app_specification,
+            environment=environment,
+            network_config=network_config,
+            role_arn=role_arn,
+            tags=tags,
+            experiment_config=experiment_config,
+        )
+        LOGGER.info("Creating processing-job with name %s", job_name)
+        LOGGER.debug("process request: %s", json.dumps(process_request, indent=4))
+        self.sagemaker_client.create_processing_job(**process_request)
+
+    def _get_process_request(
+        self,
+        inputs,
+        output_config,
+        job_name,
+        resources,
+        stopping_condition,
+        app_specification,
+        environment,
+        network_config,
+        role_arn,
+        tags,
+        experiment_config=None,
+    ):
+        """Constructs a request compatible for an Amazon SageMaker processing job.
+
+        Args:
+            inputs ([dict]): List of up to 10 ProcessingInput dictionaries.
+            output_config (dict): A config dictionary, which contains a list of up
+                to 10 ProcessingOutput dictionaries, as well as an optional KMS key ID.
+            job_name (str): The name of the processing job. The name must be unique
+                within an AWS Region in an AWS account. Names should have minimum
+                length of 1 and maximum length of 63 characters.
+            resources (dict): Encapsulates the resources, including ML instances
+                and storage, to use for the processing job.
+            stopping_condition (dict[str,int]): Specifies a limit to how long
+                the processing job can run, in seconds.
+            app_specification (dict[str,str]): Configures the processing job to
+                run the given image. Details are in the processing container
+                specification.
+            environment (dict): Environment variables to start the processing
+                container with.
+            network_config (dict): Specifies networking options, such as network
+                traffic encryption between processing containers, whether to allow
+                inbound and outbound network calls to and from processing containers,
+                and VPC subnets and security groups to use for VPC-enabled processing
+                jobs.
+            role_arn (str): The Amazon Resource Name (ARN) of an IAM role that
+                Amazon SageMaker can assume to perform tasks on your behalf.
+            tags ([dict[str,str]]): A list of dictionaries containing key-value
+                pairs.
+            experiment_config (dict): Experiment management configuration. Dictionary contains
+                three optional keys, 'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                (default: ``None``)
+
+        Returns:
+            Dict: a processing job request dict
+        """
         process_request = {
             "ProcessingJobName": job_name,
             "ProcessingResources": resources,
@@ -784,9 +849,7 @@ def process(
         if experiment_config:
             process_request["ExperimentConfig"] = experiment_config
 
-        LOGGER.info("Creating processing-job with name %s", job_name)
-        LOGGER.debug("process request: %s", json.dumps(process_request, indent=4))
-        self.sagemaker_client.create_processing_job(**process_request)
+        return process_request
 
     def create_monitoring_schedule(
         self,