|
37 | 37 | from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig |
38 | 38 | from sagemaker.workflow.parallelism_config import ParallelismConfiguration |
39 | 39 | from sagemaker.workflow.properties import Properties |
40 | | -from sagemaker.workflow.steps import Step |
| 40 | +from sagemaker.workflow.steps import Step, StepTypeEnum |
41 | 41 | from sagemaker.workflow.step_collections import StepCollection |
42 | 42 | from sagemaker.workflow.condition_step import ConditionStep |
43 | 43 | from sagemaker.workflow.utilities import list_to_request |
44 | 44 |
|
| 45 | +_DEFAULT_EXPERIMENT_CFG = PipelineExperimentConfig( |
| 46 | + ExecutionVariables.PIPELINE_NAME, ExecutionVariables.PIPELINE_EXECUTION_ID |
| 47 | +) |
| 48 | + |
45 | 49 |
|
46 | | -@attr.s |
47 | 50 | class Pipeline(Entity): |
48 | | - """Pipeline for workflow. |
| 51 | + """Pipeline for workflow.""" |
49 | 52 |
|
50 | | - Attributes: |
51 | | - name (str): The name of the pipeline. |
52 | | - parameters (Sequence[Parameter]): The list of the parameters. |
53 | | - pipeline_experiment_config (Optional[PipelineExperimentConfig]): If set, |
54 | | - the workflow will attempt to create an experiment and trial before |
55 | | - executing the steps. Creation will be skipped if an experiment or a trial with |
56 | | - the same name already exists. By default, pipeline name is used as |
57 | | - experiment name and execution id is used as the trial name. |
58 | | - If set to None, no experiment or trial will be created automatically. |
59 | | - steps (Sequence[Union[Step, StepCollection]]): The list of the non-conditional steps |
60 | | - associated with the pipeline. Any steps that are within the |
61 | | - `if_steps` or `else_steps` of a `ConditionStep` cannot be listed in the steps of a |
62 | | - pipeline. Of particular note, the workflow service rejects any pipeline definitions that |
63 | | - specify a step in the list of steps of a pipeline and that step in the `if_steps` or |
64 | | - `else_steps` of any `ConditionStep`. |
65 | | - sagemaker_session (sagemaker.session.Session): Session object that manages interactions |
66 | | - with Amazon SageMaker APIs and any other AWS services needed. If not specified, the |
67 | | - pipeline creates one using the default AWS configuration chain. |
68 | | - """ |
| 53 | + def __init__( |
| 54 | + self, |
| 55 | + name: str = "", |
| 56 | + parameters: Optional[Sequence[Parameter]] = None, |
| 57 | + pipeline_experiment_config: Optional[PipelineExperimentConfig] = _DEFAULT_EXPERIMENT_CFG, |
| 58 | + steps: Optional[Sequence[Union[Step, StepCollection]]] = None, |
| 59 | + sagemaker_session: Optional[Session] = None, |
| 60 | + ): |
| 61 | + """Initialize a Pipeline |
69 | 62 |
|
70 | | - name: str = attr.ib(factory=str) |
71 | | - parameters: Sequence[Parameter] = attr.ib(factory=list) |
72 | | - pipeline_experiment_config: Optional[PipelineExperimentConfig] = attr.ib( |
73 | | - default=PipelineExperimentConfig( |
74 | | - ExecutionVariables.PIPELINE_NAME, ExecutionVariables.PIPELINE_EXECUTION_ID |
75 | | - ) |
76 | | - ) |
77 | | - steps: Sequence[Union[Step, StepCollection]] = attr.ib(factory=list) |
78 | | - sagemaker_session: Session = attr.ib(factory=Session) |
| 63 | + Args: |
| 64 | + name (str): The name of the pipeline. |
| 65 | + parameters (Sequence[Parameter]): The list of the parameters. |
| 66 | + pipeline_experiment_config (Optional[PipelineExperimentConfig]): If set, |
| 67 | + the workflow will attempt to create an experiment and trial before |
| 68 | + executing the steps. Creation will be skipped if an experiment or a trial with |
| 69 | + the same name already exists. By default, pipeline name is used as |
| 70 | + experiment name and execution id is used as the trial name. |
| 71 | + If set to None, no experiment or trial will be created automatically. |
| 72 | + steps (Sequence[Union[Step, StepCollection]]): The list of the non-conditional steps |
| 73 | + associated with the pipeline. Any steps that are within the |
| 74 | + `if_steps` or `else_steps` of a `ConditionStep` cannot be listed in the steps of a |
| 75 | + pipeline. Of particular note, the workflow service rejects any pipeline definitions |
| 76 | + that specify a step in the list of steps of a pipeline and that step in the |
| 77 | + `if_steps` or `else_steps` of any `ConditionStep`. |
| 78 | + sagemaker_session (sagemaker.session.Session): Session object that manages interactions |
| 79 | + with Amazon SageMaker APIs and any other AWS services needed. If not specified, the |
| 80 | + pipeline creates one using the default AWS configuration chain. |
| 81 | + """ |
| 82 | + self.name = name |
| 83 | + self.parameters = parameters if parameters else [] |
| 84 | + self.pipeline_experiment_config = pipeline_experiment_config |
| 85 | + self.steps = steps if steps else [] |
| 86 | + self.sagemaker_session = sagemaker_session if sagemaker_session else Session() |
79 | 87 |
|
80 | | - _version: str = "2020-12-01" |
81 | | - _metadata: Dict[str, Any] = dict() |
| 88 | + self._version = "2020-12-01" |
| 89 | + self._metadata = dict() |
| 90 | + self._step_map = dict() |
| 91 | + _generate_step_map(self.steps, self._step_map) |
82 | 92 |
|
83 | 93 | def to_request(self) -> RequestType: |
84 | 94 | """Gets the request structure for workflow service calls.""" |
@@ -193,6 +203,8 @@ def update( |
193 | 203 | Returns: |
194 | 204 | A response dict from the service. |
195 | 205 | """ |
| 206 | + self._step_map = dict() |
| 207 | + _generate_step_map(self.steps, self._step_map) |
196 | 208 | kwargs = self._create_args(role_arn, description, parallelism_config) |
197 | 209 | return self.sagemaker_session.sagemaker_client.update_pipeline(**kwargs) |
198 | 210 |
|
@@ -305,23 +317,27 @@ def definition(self) -> str: |
305 | 317 |
|
306 | 318 | return json.dumps(request_dict) |
307 | 319 |
|
308 | | - def _interpolate_step_collection_name_in_depends_on(self, step_requests: dict): |
| 320 | + def _interpolate_step_collection_name_in_depends_on(self, step_requests: list): |
309 | 321 | """Insert step names as per `StepCollection` name in depends_on list |
310 | 322 |
|
311 | 323 | Args: |
312 | | - step_requests (dict): The raw step request dict without any interpolation. |
| 324 | + step_requests (list): The list of raw step request dicts without any interpolation. |
313 | 325 | """ |
314 | | - step_name_map = {s.name: s for s in self.steps} |
315 | 326 | for step_request in step_requests: |
316 | | - if not step_request.get("DependsOn", None): |
317 | | - continue |
318 | 327 | depends_on = [] |
319 | | - for depend_step_name in step_request["DependsOn"]: |
320 | | - if isinstance(step_name_map[depend_step_name], StepCollection): |
321 | | - depends_on.extend([s.name for s in step_name_map[depend_step_name].steps]) |
| 328 | + for depend_step_name in step_request.get("DependsOn", []): |
| 329 | + if isinstance(self._step_map[depend_step_name], StepCollection): |
| 330 | + depends_on.extend([s.name for s in self._step_map[depend_step_name].steps]) |
322 | 331 | else: |
323 | 332 | depends_on.append(depend_step_name) |
324 | | - step_request["DependsOn"] = depends_on |
| 333 | + if depends_on: |
| 334 | + step_request["DependsOn"] = depends_on |
| 335 | + |
| 336 | + if step_request["Type"] == StepTypeEnum.CONDITION.value: |
| 337 | + sub_step_requests = ( |
| 338 | + step_request["Arguments"]["IfSteps"] + step_request["Arguments"]["ElseSteps"] |
| 339 | + ) |
| 340 | + self._interpolate_step_collection_name_in_depends_on(sub_step_requests) |
325 | 341 |
|
326 | 342 |
|
327 | 343 | def format_start_parameters(parameters: Dict[str, Any]) -> List[Dict[str, Any]]: |
@@ -448,6 +464,20 @@ def update_args(args: Dict[str, Any], **kwargs): |
448 | 464 | args.update({key: value}) |
449 | 465 |
|
450 | 466 |
|
| 467 | +def _generate_step_map( |
| 468 | + steps: Sequence[Union[Step, StepCollection]], step_map: dict |
| 469 | +) -> Dict[str, Any]: |
| 470 | + """Helper method to create a mapping from Step/Step Collection name to itself.""" |
| 471 | + for step in steps: |
| 472 | + if step.name in step_map: |
| 473 | + raise ValueError("Pipeline steps cannot have duplicate names.") |
| 474 | + step_map[step.name] = step |
| 475 | + if isinstance(step, ConditionStep): |
| 476 | + _generate_step_map(step.if_steps + step.else_steps, step_map) |
| 477 | + if isinstance(step, StepCollection): |
| 478 | + _generate_step_map(step.steps, step_map) |
| 479 | + |
| 480 | + |
451 | 481 | @attr.s |
452 | 482 | class _PipelineExecution: |
453 | 483 | """Internal class for encapsulating pipeline execution instances. |
@@ -547,22 +577,11 @@ class PipelineGraph: |
547 | 577 |
|
548 | 578 | def __init__(self, steps: Sequence[Union[Step, StepCollection]]): |
549 | 579 | self.step_map = {} |
550 | | - self._generate_step_map(steps) |
| 580 | + _generate_step_map(steps, self.step_map) |
551 | 581 | self.adjacency_list = self._initialize_adjacency_list() |
552 | 582 | if self.is_cyclic(): |
553 | 583 | raise ValueError("Cycle detected in pipeline step graph.") |
554 | 584 |
|
555 | | - def _generate_step_map(self, steps: Sequence[Union[Step, StepCollection]]): |
556 | | - """Helper method to create a mapping from Step/Step Collection name to itself.""" |
557 | | - for step in steps: |
558 | | - if step.name in self.step_map: |
559 | | - raise ValueError("Pipeline steps cannot have duplicate names.") |
560 | | - self.step_map[step.name] = step |
561 | | - if isinstance(step, ConditionStep): |
562 | | - self._generate_step_map(step.if_steps + step.else_steps) |
563 | | - if isinstance(step, StepCollection): |
564 | | - self._generate_step_map(step.steps) |
565 | | - |
566 | 585 | @classmethod |
567 | 586 | def from_pipeline(cls, pipeline: Pipeline): |
568 | 587 | """Create a PipelineGraph object from the Pipeline object.""" |
|
0 commit comments