Skip to content

Commit ae62200

Browse files
authored
feat: enable internal components in pipeline yaml (Azure#26800)
* feat: enable internal components in pipeline yaml * refactor: add some gate logic * fix: fix test_pipeline_job_create_with_registries * fix: fix pylint
1 parent bba8ea7 commit ae62200

File tree

7 files changed

+40
-151
lines changed

7 files changed

+40
-151
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/node.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def make(self, data, **kwargs): # pylint: disable=unused-argument, no-self-use
4040
# dict to node object
4141
from azure.ai.ml.entities._job.pipeline._load_component import pipeline_node_factory
4242

43-
return pipeline_node_factory.load_from_dict(data) # pylint: disable=E1125, too-many-function-args
43+
return pipeline_node_factory.load_from_dict(data=data)
4444

4545
@pre_dump
4646
def resolve_inputs_outputs(self, job, **kwargs): # pylint: disable=unused-argument, no-self-use

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/command.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,9 @@ def command(self, value: str) -> None:
264264
if isinstance(self.component, Component):
265265
self.component.command = value
266266
else:
267-
msg = "Can't set command property for a registered component {}"
267+
msg = "Can't set command property for a registered component {}. Tried to set it to {}."
268268
raise ValidationException(
269-
message=msg.format(self.component),
269+
message=msg.format(self.component, value),
270270
no_personal_data_message=msg,
271271
target=ErrorTarget.COMMAND_JOB,
272272
error_category=ErrorCategory.USER_ERROR,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/pipeline/_load_component.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from azure.ai.ml.entities._builders.do_while import DoWhile
2222
from azure.ai.ml.entities._builders.pipeline import Pipeline
2323
from azure.ai.ml.entities._component.component import Component
24+
from azure.ai.ml.entities._component.component_factory import component_factory
2425
from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob
2526
from azure.ai.ml.entities._util import extract_label
2627
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException
@@ -179,7 +180,19 @@ def load_from_dict(self, *, data: dict, _type: str = None) -> Union[BaseNode, Au
179180
else:
180181
data[CommonYamlFields.TYPE] = _type
181182

182-
new_instance = self.get_create_instance_func(_type)()
183+
new_instance: Union[BaseNode, AutoMLJob] = self.get_create_instance_func(_type)()
184+
185+
if isinstance(new_instance, BaseNode):
186+
# parse component
187+
component_key = new_instance._get_component_attr_name()
188+
if component_key in data and isinstance(data[component_key], dict):
189+
data[component_key] = component_factory.load_from_dict(
190+
data=data[component_key],
191+
context={
192+
BASE_PATH_CONTEXT_KEY: data[component_key].get(BASE_PATH_CONTEXT_KEY, None),
193+
}
194+
)
195+
183196
new_instance.__init__(**data)
184197
return new_instance
185198

sdk/ml/azure-ai-ml/tests/internal/unittests/test_pipeline_job.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99
import yaml
1010

11-
from azure.ai.ml import Input, load_component
11+
from azure.ai.ml import Input, load_component, load_job
1212
from azure.ai.ml._internal import (
1313
AISuperComputerConfiguration,
1414
AISuperComputerScalePolicy,
@@ -592,3 +592,18 @@ def test_pipeline_with_setting_node_output_directly(self) -> None:
592592
copy_file.outputs.output_dir.path = "path_on_datastore"
593593
assert copy_file.outputs.output_dir.path == "path_on_datastore"
594594
assert copy_file.outputs.output_dir.type == "path"
595+
596+
def test_job_properties(self):
597+
pipeline_job: PipelineJob = load_job(
598+
source="./tests/test_configs/internal/pipeline_jobs/pipeline_job_with_properties.yml"
599+
)
600+
pipeline_dict = pipeline_job._to_dict()
601+
rest_pipeline_dict = pipeline_job._to_rest_object().as_dict()["properties"]
602+
assert pipeline_dict["properties"] == {"AZURE_ML_PathOnCompute_input_data": "/tmp/test"}
603+
assert rest_pipeline_dict["properties"] == pipeline_dict["properties"]
604+
for name, node_dict in pipeline_dict["jobs"].items():
605+
rest_node_dict = rest_pipeline_dict["jobs"][name]
606+
assert len(node_dict["properties"]) == 1
607+
assert "AZURE_ML_PathOnCompute_" in list(node_dict["properties"].keys())[0]
608+
assert node_dict["properties"] == rest_node_dict["properties"]
609+

sdk/ml/azure-ai-ml/tests/test_configs/internal/pipeline_jobs/pipeline_job_with_properties.yml

Lines changed: 7 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -26,139 +26,13 @@ properties:
2626
AZURE_ML_PathOnCompute_input_data: "/tmp/test"
2727

2828
jobs:
29-
node0: # inline command job with properties
30-
command: echo hello ${{inputs.hello_string}}
31-
environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
32-
inputs:
33-
hello_string: ${{parent.inputs.hello_string}}
34-
properties:
35-
AZURE_ML_PathOnCompute_hello_string: "/tmp/test"
36-
37-
node1: # inline parallel job with properties
38-
type: parallel
39-
compute: "azureml:cpu-cluster"
40-
inputs:
41-
test1: ${{parent.inputs.input_data}}
42-
resources:
43-
instance_count: 3
44-
mini_batch_size: "100kb"
45-
mini_batch_error_threshold: 5
46-
logging_level: "DEBUG"
47-
input_data: ${{inputs.input_data}}
48-
max_concurrency_per_instance: 2
49-
task:
50-
type: run_function
51-
code: "../python"
52-
entry_script: pass_through.py
53-
append_row_to: ${{outputs.scored_result}} # optional, If Null, equals to summary_only mode in v1.
54-
environment: azureml:my-env:1
55-
properties:
56-
AZURE_ML_PathOnCompute_input_data: "/tmp/test"
57-
58-
node2: # inline import job with properties
59-
type: import
60-
source:
61-
type: azuresqldb
62-
query: >-
63-
select * from REGION
64-
connection: azureml:my_username_password
65-
output:
66-
type: mltable
67-
path: azureml://datastores/workspaceblobstore/paths/output_dir/
68-
properties:
69-
AZURE_ML_PathOnCompute_output: "/tmp/test"
70-
71-
node3: # inline spark job with properties
72-
type: spark
73-
inputs:
74-
test1: ${{parent.inputs.input_data}}
75-
file_input2: ${{parent.inputs.input_data}}
76-
code: ../dsl_pipeline/spark_job_in_pipeline/src
77-
entry:
78-
file: entry.py # file path of the entry file relative to the code root folder
79-
py_files:
80-
- utils.zip
81-
jars:
82-
- scalaproj.jar
83-
files:
84-
- my_files.txt
85-
args: >-
86-
--file_input1 ${{inputs.test1}}
87-
--file_input2 ${{inputs.file_input2}}
88-
--output ${{outputs.output}}
89-
compute: azureml:rezas-synapse-10
90-
conf:
91-
spark.driver.cores: 2
92-
spark.driver.memory: "1g"
93-
spark.executor.cores: 1
94-
spark.executor.memory: "1g"
95-
spark.executor.instances: 1
96-
properties:
97-
AZURE_ML_PathOnCompute_input_data: "/tmp/test"
98-
99-
node4: # inline automl job with properties
100-
type: automl
101-
task: text_ner
102-
log_verbosity: info
103-
primary_metric: accuracy
104-
limits:
105-
max_trials: 1
106-
timeout_minutes: 60
107-
training_data: ${{parent.inputs.text_ner_training_data}}
108-
validation_data: ${{parent.inputs.text_ner_validation_data}}
109-
properties:
110-
AZURE_ML_PathOnCompute_training_data: "/tmp/test"
111-
112-
node5: # inline sweep job with properties
113-
type: sweep
114-
search_space:
115-
component_in_number:
116-
type: choice
117-
values:
118-
- 25
119-
- 35
120-
limits:
121-
max_total_trials: 3
122-
sampling_algorithm: random
123-
objective:
124-
goal: maximize
125-
primary_metric: accuracy
126-
trial: azureml:microsoftsamplescommandcomponentbasic_nopaths_test:1
127-
properties:
128-
AZURE_ML_PathOnCompute_input: "/tmp/test"
129-
130-
node6: # parallel node with properties as a typical implement of base node.
131-
type: parallel
29+
node7: # internal command node with properties as a typical implement of internal base node.
30+
type: CommandComponent
13231
compute: azureml:cpu-cluster
133-
component: ../components/parallel_component_with_file_input.yml
32+
component: file:../helloworld/helloworld_component_command.yml
13433
inputs:
135-
job_data_path: ${{parent.inputs.pipeline_job_data_path}}
136-
outputs:
137-
job_output_path:
138-
mini_batch_size: "1"
139-
mini_batch_error_threshold: 1
140-
max_concurrency_per_instance: 1
141-
properties:
142-
AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"
143-
144-
# Comment these lines out as internal node is not well supported in yaml now.
145-
# node7: # internal command node with properties as a typical implement of internal base node.
146-
# type: CommandComponent
147-
# compute: azureml:cpu-cluster
148-
# component: ../internal/helloworld/helloworld_component_command.yml
149-
# inputs:
150-
# training_data: ${{parent.inputs.input_data}}
151-
# max_epochs: 10
152-
# learning_rate: 0.01
153-
# properties:
154-
# AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
155-
156-
node8: # pipeline node with properties
157-
type: pipeline
158-
inputs:
159-
component_in_number: 11
160-
component_in_path: ${{parent.inputs.input_data}}
161-
162-
component: ../components/helloworld_pipeline_component.yml
34+
training_data: ${{parent.inputs.input_data}}
35+
max_epochs: 10
36+
learning_rate: 0.01
16337
properties:
164-
AZURE_ML_PathOnCompute_job_component_in_path: "/tmp/test"
38+
AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"

sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/hello_pipeline_job_with_registries.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ inputs:
88
jobs:
99
a:
1010
component: azureml://registries/testFeed/components/my_hello_world_asset_2/versions/1
11-
command: echo hello ${{inputs.hello_string}}
1211
environment: azureml://registries/testFeed/environments/sklearn-10-ubuntu2004-py38-cpu/versions/19.dev6
1312
b:
1413
command: echo "world" >> ${{outputs.world_output}}/world.txt

sdk/ml/azure-ai-ml/tests/test_configs/pipeline_jobs/pipeline_job_with_properties.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,6 @@ jobs:
141141
properties:
142142
AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"
143143

144-
# Comment these lines out as internal node is not well supported in yaml now.
145-
# node7: # internal command node with properties as a typical implement of internal base node.
146-
# type: CommandComponent
147-
# compute: azureml:cpu-cluster
148-
# component: ../internal/helloworld/helloworld_component_command.yml
149-
# inputs:
150-
# training_data: ${{parent.inputs.input_data}}
151-
# max_epochs: 10
152-
# learning_rate: 0.01
153-
# properties:
154-
# AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
155-
156144
node8: # pipeline node with properties
157145
type: pipeline
158146
inputs:

0 commit comments

Comments
 (0)