Skip to content

Commit f95583c

Browse files
authored
[ML][Pipelines]support skip-no-change (Azure#30451)
* support skip-no-change * add test * update test * update code logic * fix test * fix test * update test recording * update test recording * update log info * update pylint * update test * update recording * update recording * update recording * update recording * update recording * update recording * update recording
1 parent f92098c commit f95583c

File tree

179 files changed

+40715
-34979
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+40715
-34979
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,14 @@ def _get_anonymous_hash(self) -> str:
428428
429429
same anonymous component(same code and interface) will have same name.
430430
"""
431-
component_interface_dict = self._to_dict()
432431
# omit version since anonymous component's version is random guid
433432
# omit name since name doesn't impact component's uniqueness
434-
return hash_dict(component_interface_dict, keys_to_omit=["name", "id", "version"])
433+
return self._get_component_hash(keys_to_omit=["name", "id", "version"])
434+
435+
def _get_component_hash(self, keys_to_omit=None) -> str:
436+
"""Return the hash of component."""
437+
component_interface_dict = self._to_dict()
438+
return hash_dict(component_interface_dict, keys_to_omit=keys_to_omit)
435439

436440
@classmethod
437441
def _get_resource_type(cls) -> str:
@@ -510,14 +514,15 @@ def _to_rest_object(self) -> ComponentVersion:
510514
component_spec=component,
511515
description=self.description,
512516
is_anonymous=self._is_anonymous,
513-
properties=self.properties,
517+
properties=dict(self.properties) if self.properties else {},
514518
tags=self.tags,
515519
)
516520
result = ComponentVersion(properties=properties)
517521
if self._is_anonymous:
518522
result.name = ANONYMOUS_COMPONENT_NAME
519523
else:
520524
result.name = self.name
525+
result.properties.properties["client_component_hash"] = self._get_component_hash(keys_to_omit=["version"])
521526
return result
522527

523528
def _to_dict(self) -> Dict:

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
from inspect import Parameter, signature
1111
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
1212

13+
from azure.core.exceptions import ResourceNotFoundError
14+
1315
from azure.ai.ml._restclient.v2021_10_01_dataplanepreview import (
1416
AzureMachineLearningWorkspaces as ServiceClient102021Dataplane,
1517
)
1618
from azure.ai.ml._restclient.v2022_10_01 import AzureMachineLearningWorkspaces as ServiceClient102022
17-
from azure.ai.ml._restclient.v2022_10_01.models import ListViewType
19+
from azure.ai.ml._restclient.v2022_10_01.models import ComponentVersion, ListViewType
1820
from azure.ai.ml._scope_dependent_operations import (
1921
OperationConfig,
2022
OperationsContainer,
@@ -167,6 +169,36 @@ def list(
167169
)
168170
)
169171

172+
@monitor_with_telemetry_mixin(logger, "ComponentVersion.Get", ActivityType.INTERNALCALL)
173+
def _get_component_version(self, name: str, version: Optional[str] = DEFAULT_COMPONENT_VERSION) -> ComponentVersion:
174+
"""Returns ComponentVersion information about the specified component name and version.
175+
176+
:param name: Name of the code component.
177+
:type name: str
178+
:param version: Version of the component.
179+
:type version: Optional[str]
180+
:return: The ComponentVersion object of the specified component name and version.
181+
:rtype: ~azure.ai.ml.entities.ComponentVersion
182+
"""
183+
result = (
184+
self._version_operation.get(
185+
name=name,
186+
version=version,
187+
resource_group_name=self._resource_group_name,
188+
registry_name=self._registry_name,
189+
**self._init_args,
190+
)
191+
if self._registry_name
192+
else self._version_operation.get(
193+
name=name,
194+
version=version,
195+
resource_group_name=self._resource_group_name,
196+
workspace_name=self._workspace_name,
197+
**self._init_args,
198+
)
199+
)
200+
return result
201+
170202
@monitor_with_telemetry_mixin(logger, "Component.Get", ActivityType.PUBLICAPI)
171203
def get(self, name: str, version: Optional[str] = None, label: Optional[str] = None) -> Component:
172204
"""Returns information about the specified component.
@@ -201,23 +233,7 @@ def get(self, name: str, version: Optional[str] = None, label: Optional[str] = N
201233
if label:
202234
return _resolve_label_to_asset(self, name, label)
203235

204-
result = (
205-
self._version_operation.get(
206-
name=name,
207-
version=version,
208-
resource_group_name=self._resource_group_name,
209-
registry_name=self._registry_name,
210-
**self._init_args,
211-
)
212-
if self._registry_name
213-
else self._version_operation.get(
214-
name=name,
215-
version=version,
216-
resource_group_name=self._resource_group_name,
217-
workspace_name=self._workspace_name,
218-
**self._init_args,
219-
)
220-
)
236+
result = self._get_component_version(name, version)
221237
component = Component._from_rest_object(result)
222238
self._resolve_dependencies_for_pipeline_component_jobs(
223239
component,
@@ -353,6 +369,26 @@ def create_or_update(
353369
name, version = component._get_rest_name_version()
354370
rest_component_resource = component._to_rest_object()
355371
result = None
372+
try:
373+
if not component._is_anonymous and kwargs.get("skip_if_no_change"):
374+
client_component_hash = rest_component_resource.properties.properties.get("client_component_hash")
375+
remote_component_version = self._get_component_version(name=name) # will raise error if not found.
376+
remote_component_hash = remote_component_version.properties.properties.get("client_component_hash")
377+
if client_component_hash == remote_component_hash:
378+
component.version = remote_component_version.properties.component_spec.get(
379+
"version"
380+
) # only update the default version component instead of creating a new version component
381+
version = component.version
382+
rest_component_resource = component._to_rest_object()
383+
logger.warning(
384+
"The component is not modified compared to the default version "
385+
"and the new version component registration is skipped."
386+
)
387+
except ResourceNotFoundError as e:
388+
logger.info("Failed to get component version, %s", e)
389+
except Exception as e: # pylint: disable=broad-except
390+
logger.error("Failed to compare client_component_hash, %s", e)
391+
356392
try:
357393
if self._registry_name:
358394
start_time = time.time()

sdk/ml/azure-ai-ml/tests/component/e2etests/test_component.py

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,30 @@
11
import re
2+
import time
23
import uuid
34
from itertools import tee
45
from pathlib import Path
56
from typing import Callable
67

78
import pydash
89
import pytest
10+
from azure.core.exceptions import HttpResponseError
11+
from azure.core.paging import ItemPaged
12+
from devtools_testutils import AzureRecordedTestCase, is_live
13+
from test_utilities.utils import assert_job_cancel, omit_with_wildcard, sleep_if_live
14+
915
from azure.ai.ml import MLClient, MpiDistribution, load_component, load_environment
1016
from azure.ai.ml._restclient.v2022_05_01.models import ListViewType
1117
from azure.ai.ml._utils._arm_id_utils import is_ARM_id_for_resource
18+
from azure.ai.ml.constants._assets import IPProtectionLevel
1219
from azure.ai.ml.constants._common import (
1320
ANONYMOUS_COMPONENT_NAME,
1421
ARM_ID_PREFIX,
1522
PROVIDER_RESOURCE_ID_WITH_VERSION,
1623
AzureMLResourceType,
1724
)
18-
from azure.ai.ml.constants._assets import IPProtectionLevel
1925
from azure.ai.ml.dsl._utils import _sanitize_python_variable_name
2026
from azure.ai.ml.entities import CommandComponent, Component, PipelineComponent
2127
from azure.ai.ml.entities._load_functions import load_code, load_job
22-
from azure.core.exceptions import HttpResponseError
23-
from azure.core.paging import ItemPaged
24-
from devtools_testutils import AzureRecordedTestCase, is_live
25-
from test_utilities.utils import assert_job_cancel, omit_with_wildcard, sleep_if_live
2628

2729
from .._util import _COMPONENT_TIMEOUT_SECOND
2830
from ..unittests.test_component_schema import load_component_entity_from_rest_json
@@ -34,6 +36,7 @@ def create_component(
3436
path="./tests/test_configs/components/helloworld_component.yml",
3537
params_override=None,
3638
is_anonymous=False,
39+
**kwargs,
3740
):
3841
default_param_override = [{"name": component_name}]
3942
if params_override is None:
@@ -45,7 +48,7 @@ def create_component(
4548
source=path,
4649
params_override=params_override,
4750
)
48-
return client.components.create_or_update(command_component, is_anonymous=is_anonymous)
51+
return client.components.create_or_update(command_component, is_anonymous=is_anonymous, **kwargs)
4952

5053

5154
@pytest.fixture
@@ -383,7 +386,7 @@ def test_command_component_create_input_output_types(
383386
assert target_entity.id
384387
# server side will remove \n from the code now. Skip them given it's not targeted to check in this test
385388
# server side will return optional False for optional None input
386-
omit_fields = ["id", "command", "environment", "inputs.*.optional"]
389+
omit_fields = ["id", "command", "environment", "inputs.*.optional", "properties"]
387390
assert omit_with_wildcard(component_entity._to_dict(), *omit_fields) == omit_with_wildcard(
388391
target_entity._to_dict(), *omit_fields
389392
)
@@ -1026,3 +1029,53 @@ def test_ipp_component_create(self, ipp_registry_client: MLClient, randstr: Call
10261029
from_rest_component.outputs["model_output_ipp"]._intellectual_property
10271030
== command_component.outputs["model_output_ipp"]._intellectual_property
10281031
)
1032+
1033+
def test_create_component_skip_if_no_change(self, client: MLClient, randstr):
1034+
component_operation = client._operation_container.all_operations[AzureMLResourceType.COMPONENT]
1035+
component_name = "test_skip_if_no_change"
1036+
try:
1037+
default_component = component_operation.get(name=component_name)
1038+
except Exception:
1039+
default_component = None
1040+
default_version = default_component.version if default_component else "1"
1041+
# update default component by current local component data.
1042+
default_component = create_component(client, component_name=component_name, version=default_version)
1043+
1044+
# test component has no change and use skip_if_no_change parameter
1045+
new_version = randstr("component_version")
1046+
new_component = create_component(
1047+
client, component_name=component_name, version=new_version, skip_if_no_change=True
1048+
)
1049+
assert default_component._get_component_hash(
1050+
keys_to_omit=["creation_context"]
1051+
) == new_component._get_component_hash(keys_to_omit=["creation_context"])
1052+
1053+
# test component has change and use skip_if_no_change parameter
1054+
new_version = randstr("component_version")
1055+
params_override = [
1056+
{"description": "description_{0}".format(new_version)},
1057+
{"display_name": "display_name_{0}".format(new_version)},
1058+
{"tags": {"tags": "tags_{0}".format(new_version)}},
1059+
]
1060+
new_component = create_component(
1061+
client,
1062+
component_name=component_name,
1063+
version=new_version,
1064+
params_override=params_override,
1065+
skip_if_no_change=True,
1066+
)
1067+
assert default_component._get_component_hash(
1068+
keys_to_omit=["creation_context"]
1069+
) != new_component._get_component_hash(keys_to_omit=["creation_context"])
1070+
assert new_component.description == "description_{0}".format(new_version)
1071+
assert new_component.display_name == "display_name_{0}".format(new_version)
1072+
assert new_component.tags == {"tags": "tags_{0}".format(new_version)}
1073+
assert new_component.version == new_version
1074+
1075+
# test component has no change and not use skip_if_no_change parameter
1076+
new_version = randstr("component_version")
1077+
new_component = create_component(client, component_name=component_name, version=new_version)
1078+
assert default_component._get_component_hash(
1079+
keys_to_omit=["creation_context"]
1080+
) != new_component._get_component_hash(keys_to_omit=["creation_context"])
1081+
assert new_component.version == new_version

sdk/ml/azure-ai-ml/tests/component/e2etests/test_component_hash.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def create_component(
3939
@pytest.mark.usefixtures(
4040
"recorded_test",
4141
"mock_asset_name",
42+
"mock_component_hash",
4243
)
4344
@pytest.mark.pipeline_test
4445
class TestComponentHash(AzureRecordedTestCase):

sdk/ml/azure-ai-ml/tests/component/unittests/test_command_component_entity.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,26 @@
1+
import enum
12
import os
23
import sys
4+
import tempfile
35
from io import StringIO
4-
import enum
56
from pathlib import Path
6-
import tempfile
77
from unittest.mock import patch
88
from zipfile import ZipFile
99

1010
import pydash
1111
import pytest
12-
13-
from conftest import normalized_arm_id_in_object
14-
from test_utilities.utils import verify_entity_load_and_dump, build_temp_folder
12+
from test_utilities.utils import build_temp_folder, verify_entity_load_and_dump
1513

1614
from azure.ai.ml import Input, MpiDistribution, Output, TensorFlowDistribution, command, load_component
1715
from azure.ai.ml._utils.utils import load_yaml
1816
from azure.ai.ml.constants._common import AzureMLResourceType
19-
from azure.ai.ml.entities import Component, CommandComponent, CommandJobLimits, JobResourceConfiguration
20-
from azure.ai.ml.entities._assets import Code
21-
from azure.ai.ml.entities._assets import Environment
17+
from azure.ai.ml.entities import CommandComponent, CommandJobLimits, Component, JobResourceConfiguration
18+
from azure.ai.ml.entities._assets import Code, Environment
2219
from azure.ai.ml.entities._builders import Command, Sweep
2320
from azure.ai.ml.entities._job.pipeline._io import PipelineInput
2421
from azure.ai.ml.exceptions import UnexpectedKeywordError, ValidationException
2522
from azure.ai.ml.sweep import Choice
23+
from conftest import normalized_arm_id_in_object
2624

2725
from .._util import _COMPONENT_TIMEOUT_SECOND
2826

@@ -92,7 +90,11 @@ def test_command_component_entity(self):
9290
environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33",
9391
)
9492
component_dict = component._to_rest_object().as_dict()
95-
omits = ["properties.component_spec.$schema", "properties.component_spec._source"]
93+
omits = [
94+
"properties.component_spec.$schema",
95+
"properties.component_spec._source",
96+
"properties.properties.client_component_hash",
97+
]
9698
component_dict = pydash.omit(component_dict, *omits)
9799

98100
yaml_path = "./tests/test_configs/components/basic_component_code_arm_id.yml"
@@ -235,13 +237,15 @@ def test_command_component_instance_count(self):
235237
"properties.component_spec.distribution.added_property",
236238
"properties.component_spec.resources.properties",
237239
"properties.component_spec._source",
240+
"properties.properties.client_component_hash",
238241
)
239242
yaml_component_dict = pydash.omit(
240243
yaml_component_dict,
241244
"properties.component_spec.$schema",
242245
"properties.component_spec.distribution.added_property",
243246
"properties.component_spec.resources.properties",
244247
"properties.component_spec._source",
248+
"properties.properties.client_component_hash",
245249
)
246250
assert component_dict == yaml_component_dict
247251

sdk/ml/azure-ai-ml/tests/component/unittests/test_data_transfer_component_entity.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,16 @@
11
import pydash
22
import pytest
33

4-
from azure.ai.ml import MLClient
5-
from azure.ai.ml import load_component
4+
from azure.ai.ml import MLClient, load_component
5+
from azure.ai.ml.constants._component import DataCopyMode, DataTransferTaskType
66
from azure.ai.ml.entities._component.datatransfer_component import (
77
DataTransferCopyComponent,
8-
DataTransferImportComponent,
98
DataTransferExportComponent,
9+
DataTransferImportComponent,
1010
)
11-
from azure.ai.ml.constants._component import DataCopyMode, DataTransferTaskType
12-
from .test_component_schema import (
13-
load_component_entity_from_rest_json,
14-
load_component_entity_from_yaml,
15-
)
11+
1612
from .._util import _COMPONENT_TIMEOUT_SECOND
13+
from .test_component_schema import load_component_entity_from_rest_json, load_component_entity_from_yaml
1714

1815

1916
@pytest.mark.timeout(_COMPONENT_TIMEOUT_SECOND)
@@ -148,6 +145,7 @@ def test_copy_task_component_entity(self):
148145
omit_fields = [
149146
"properties.component_spec.$schema",
150147
"properties.component_spec._source",
148+
"properties.properties.client_component_hash",
151149
]
152150
component._validate()
153151
component_dict = component._to_rest_object().as_dict()

sdk/ml/azure-ai-ml/tests/component/unittests/test_parallel_component_entity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def test_parallel_component_entity(self):
6868
"properties.component_spec.$schema",
6969
"properties.component_spec.inputs",
7070
"properties.component_spec._source",
71+
"properties.properties.client_component_hash",
7172
]
7273
component_dict = component._to_rest_object().as_dict()
7374
component_dict = pydash.omit(component_dict, *omit_fields)

sdk/ml/azure-ai-ml/tests/component/unittests/test_spark_component_entity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def test_spark_component_entity(self):
6161
omit_fields = [
6262
"properties.component_spec.$schema",
6363
"properties.component_spec._source",
64+
"properties.properties.client_component_hash",
6465
]
6566
component_dict = component._to_rest_object().as_dict()
6667
component_dict = pydash.omit(component_dict, *omit_fields)

0 commit comments

Comments
 (0)