Skip to content

Commit 596b2c0

Browse files
YAML signing (Azure#38855)
* first draft : YAML signing * YAML signing * component ops adding prepare for sign * resolving comments * fixing pylint and black
1 parent cf28b06 commit 596b2c0

File tree

5 files changed

+208
-15
lines changed

5 files changed

+208
-15
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import hashlib
88
import logging
99
import os
10+
import json
1011
import uuid
1112
import warnings
1213
from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -15,7 +16,17 @@
1516
from os import PathLike
1617
from pathlib import Path
1718
from platform import system
18-
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast
19+
from typing import (
20+
TYPE_CHECKING,
21+
Any,
22+
Dict,
23+
Iterable,
24+
List,
25+
Optional,
26+
Tuple,
27+
Union,
28+
cast,
29+
)
1930

2031
from colorama import Fore
2132
from tqdm import TqdmWarning, tqdm
@@ -56,7 +67,11 @@
5667
from azure.ai.ml._restclient.v2023_04_01.models import PendingUploadRequestDto
5768
from azure.ai.ml._utils._pathspec import GitWildMatchPattern, normalize_file
5869
from azure.ai.ml._utils.utils import convert_windows_path_to_unix, retry, snake_to_camel
59-
from azure.ai.ml.constants._common import MAX_AUTOINCREMENT_ATTEMPTS, DefaultOpenEncoding, OrderString
70+
from azure.ai.ml.constants._common import (
71+
MAX_AUTOINCREMENT_ATTEMPTS,
72+
DefaultOpenEncoding,
73+
OrderString,
74+
)
6075
from azure.ai.ml.entities._assets.asset import Asset
6176
from azure.ai.ml.exceptions import (
6277
AssetPathException,
@@ -247,6 +262,33 @@ def _get_file_hash(filename: Union[str, os.PathLike], _hash: hash_type) -> hash_
247262
return _hash
248263

249264

265+
def delete_two_catalog_files(path):
266+
"""
267+
Function that deletes the "catalog.json" and "catalog.json.sig" files located at 'path', if they exist
268+
269+
:param path: Path to the folder for signing
270+
:type path: Union[Path, str]
271+
:return: None
272+
"""
273+
# catalog.json
274+
file_path_json = os.path.join(path, "catalog.json")
275+
if os.path.exists(file_path_json):
276+
module_logger.warning("%s already exists. Deleting it", file_path_json)
277+
os.remove(file_path_json)
278+
# catalog.json.sig
279+
file_path_json_sig = os.path.join(path, "catalog.json.sig")
280+
if os.path.exists(file_path_json_sig):
281+
module_logger.warning("%s already exists. Deleting it", file_path_json_sig)
282+
os.remove(file_path_json_sig)
283+
284+
285+
def create_catalog_files(path, json_stub):
286+
with open(os.path.join(path, "catalog.json"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile1:
287+
json.dump(json_stub, jsonFile1)
288+
with open(os.path.join(path, "catalog.json.sig"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile2:
289+
json.dump(json_stub, jsonFile2)
290+
291+
250292
def _get_dir_hash(directory: Union[str, os.PathLike], _hash: hash_type, ignore_file: IgnoreFile) -> hash_type:
251293
dir_contents = Path(directory).iterdir()
252294
sorted_contents = sorted(dir_contents, key=lambda path: str(path).lower())
@@ -349,7 +391,10 @@ def get_content_hash(path: Union[str, os.PathLike], ignore_file: IgnoreFile = Ig
349391

350392

351393
def get_upload_files_from_folder(
352-
path: Union[str, os.PathLike], *, prefix: str = "", ignore_file: IgnoreFile = IgnoreFile()
394+
path: Union[str, os.PathLike],
395+
*,
396+
prefix: str = "",
397+
ignore_file: IgnoreFile = IgnoreFile(),
353398
) -> List[str]:
354399
path = Path(path)
355400
upload_paths = []
@@ -432,7 +477,12 @@ def traverse_directory( # pylint: disable=unused-argument
432477
result = []
433478
for origin_file_path in origin_file_paths:
434479
relative_path = origin_file_path.relative_to(root)
435-
result.append((_resolve_path(origin_file_path).as_posix(), Path(prefix).joinpath(relative_path).as_posix()))
480+
result.append(
481+
(
482+
_resolve_path(origin_file_path).as_posix(),
483+
Path(prefix).joinpath(relative_path).as_posix(),
484+
)
485+
)
436486
return result
437487

438488

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,48 @@
44

55
# pylint: disable=protected-access,too-many-lines
66
import time
7+
import collections
78
import types
89
from functools import partial
910
from inspect import Parameter, signature
1011
from os import PathLike
1112
from pathlib import Path
1213
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
14+
import hashlib
1315

1416
from azure.ai.ml._restclient.v2021_10_01_dataplanepreview import (
1517
AzureMachineLearningWorkspaces as ServiceClient102021Dataplane,
1618
)
17-
from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024
18-
from azure.ai.ml._restclient.v2024_01_01_preview.models import ComponentVersion, ListViewType
19+
from azure.ai.ml._restclient.v2024_01_01_preview import (
20+
AzureMachineLearningWorkspaces as ServiceClient012024,
21+
)
22+
from azure.ai.ml._restclient.v2024_01_01_preview.models import (
23+
ComponentVersion,
24+
ListViewType,
25+
)
1926
from azure.ai.ml._scope_dependent_operations import (
2027
OperationConfig,
2128
OperationsContainer,
2229
OperationScope,
2330
_ScopeDependentOperations,
2431
)
25-
from azure.ai.ml._telemetry import ActivityType, monitor_with_activity, monitor_with_telemetry_mixin
32+
from azure.ai.ml._telemetry import (
33+
ActivityType,
34+
monitor_with_activity,
35+
monitor_with_telemetry_mixin,
36+
)
2637
from azure.ai.ml._utils._asset_utils import (
2738
_archive_or_restore,
2839
_create_or_update_autoincrement,
40+
_get_file_hash,
2941
_get_latest,
3042
_get_next_version_from_container,
3143
_resolve_label_to_asset,
44+
get_ignore_file,
45+
get_upload_files_from_folder,
46+
IgnoreFile,
47+
delete_two_catalog_files,
48+
create_catalog_files,
3249
)
3350
from azure.ai.ml._utils._azureml_polling import AzureMLPolling
3451
from azure.ai.ml._utils._endpoint_utils import polling_wait
@@ -42,7 +59,12 @@
4259
LROConfigurations,
4360
)
4461
from azure.ai.ml.entities import Component, ValidationResult
45-
from azure.ai.ml.exceptions import ComponentException, ErrorCategory, ErrorTarget, ValidationException
62+
from azure.ai.ml.exceptions import (
63+
ComponentException,
64+
ErrorCategory,
65+
ErrorTarget,
66+
ValidationException,
67+
)
4668
from azure.core.exceptions import HttpResponseError, ResourceNotFoundError
4769

4870
from .._utils._cache_utils import CachedNodeResolver
@@ -282,7 +304,8 @@ def _localize_code(self, component: Component, base_dir: Path) -> None:
282304

283305
target_code_value = "./code"
284306
self._code_operations.download(
285-
**extract_name_and_version(code), download_path=base_dir.joinpath(target_code_value)
307+
**extract_name_and_version(code),
308+
download_path=base_dir.joinpath(target_code_value),
286309
)
287310

288311
setattr(component, component._get_code_field_name(), target_code_value)
@@ -311,7 +334,13 @@ def _localize_environment(self, component: Component, base_dir: Path) -> None:
311334

312335
@experimental
313336
@monitor_with_telemetry_mixin(ops_logger, "Component.Download", ActivityType.PUBLICAPI)
314-
def download(self, name: str, download_path: Union[PathLike, str] = ".", *, version: Optional[str] = None) -> None:
337+
def download(
338+
self,
339+
name: str,
340+
download_path: Union[PathLike, str] = ".",
341+
*,
342+
version: Optional[str] = None,
343+
) -> None:
315344
"""Download the specified component and its dependencies to local. Local component can be used to create
316345
the component in another workspace or for offline development.
317346
@@ -491,7 +520,11 @@ def _reset_version_if_no_change(self, component: Component, current_name: str, c
491520
return current_version, rest_component_resource
492521

493522
def _create_or_update_component_version(
494-
self, component: Component, name: str, version: Optional[str], rest_component_resource: Any
523+
self,
524+
component: Component,
525+
name: str,
526+
version: Optional[str],
527+
rest_component_resource: Any,
495528
) -> Any:
496529
try:
497530
if self._registry_name:
@@ -652,6 +685,28 @@ def create_or_update(
652685
)
653686
return component
654687

688+
@experimental
689+
def prepare_for_sign(self, component: Component):
690+
ignore_file = IgnoreFile()
691+
692+
if isinstance(component, ComponentCodeMixin):
693+
with component._build_code() as code:
694+
delete_two_catalog_files(code.path)
695+
ignore_file = get_ignore_file(code.path) if code._ignore_file is None else ignore_file
696+
file_list = get_upload_files_from_folder(code.path, ignore_file=ignore_file)
697+
json_stub = {}
698+
json_stub["HashAlgorithm"] = "SHA256"
699+
json_stub["CatalogItems"] = {} # type: ignore
700+
701+
for file_path, file_name in sorted(file_list, key=lambda x: str(x[1]).lower()):
702+
file_hash = _get_file_hash(file_path, hashlib.sha256()).hexdigest().upper()
703+
json_stub["CatalogItems"][file_name] = file_hash # type: ignore
704+
705+
json_stub["CatalogItems"] = collections.OrderedDict( # type: ignore
706+
sorted(json_stub["CatalogItems"].items()) # type: ignore
707+
)
708+
create_catalog_files(code.path, json_stub)
709+
655710
@monitor_with_telemetry_mixin(ops_logger, "Component.Archive", ActivityType.PUBLICAPI)
656711
def archive(
657712
self,
@@ -860,7 +915,9 @@ def _resolve_binding_on_supported_fields_for_node(cls, node: BaseNode) -> None:
860915
:param node: The node
861916
:type node: BaseNode
862917
"""
863-
from azure.ai.ml.entities._job.pipeline._attr_dict import try_get_non_arbitrary_attr
918+
from azure.ai.ml.entities._job.pipeline._attr_dict import (
919+
try_get_non_arbitrary_attr,
920+
)
864921
from azure.ai.ml.entities._job.pipeline._io import PipelineInput
865922

866923
# compute binding to pipeline input is supported on node.
@@ -968,7 +1025,9 @@ def _try_resolve_compute_for_node(cls, node: BaseNode, _: str, resolver: _AssetR
9681025

9691026
@classmethod
9701027
def _divide_nodes_to_resolve_into_layers(
971-
cls, component: PipelineComponent, extra_operations: List[Callable[[BaseNode, str], Any]]
1028+
cls,
1029+
component: PipelineComponent,
1030+
extra_operations: List[Callable[[BaseNode, str], Any]],
9721031
) -> List:
9731032
"""Traverse the pipeline component and divide nodes to resolve into layers. Note that all leaf nodes will be
9741033
put in the last layer.
@@ -1029,7 +1088,8 @@ def _divide_nodes_to_resolve_into_layers(
10291088
def _get_workspace_key(self) -> str:
10301089
try:
10311090
workspace_rest = self._workspace_operations._operation.get(
1032-
resource_group_name=self._resource_group_name, workspace_name=self._workspace_name
1091+
resource_group_name=self._resource_group_name,
1092+
workspace_name=self._workspace_name,
10331093
)
10341094
return str(workspace_rest.workspace_id)
10351095
except HttpResponseError:
@@ -1099,7 +1159,10 @@ def _resolve_dependencies_for_pipeline_component_jobs(
10991159
extra_operations=[
11001160
# no need to do this as we now keep the original component name for anonymous components
11011161
# self._set_default_display_name_for_anonymous_component_in_node,
1102-
partial(self._try_resolve_node_level_task_for_parallel_node, resolver=resolver),
1162+
partial(
1163+
self._try_resolve_node_level_task_for_parallel_node,
1164+
resolver=resolver,
1165+
),
11031166
partial(self._try_resolve_environment_for_component, resolver=resolver),
11041167
partial(self._try_resolve_compute_for_node, resolver=resolver),
11051168
# should we resolve code here after we do extra operations concurrently?
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import argparse
2+
import os
3+
from datetime import datetime
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument("--componentB_input", type=str)
7+
parser.add_argument("--componentB_output", type=str)
8+
9+
print("Hello Python World...\nI'm componentB :-)")
10+
11+
args = parser.parse_args()
12+
13+
print("componentB_input path: %s" % args.componentB_input)
14+
print("componentB_output path: %s" % args.componentB_output)
15+
16+
print("files in input path: ")
17+
arr = os.listdir(args.componentB_input)
18+
print(arr)
19+
20+
for filename in arr:
21+
print("reading file: %s ..." % filename)
22+
with open(os.path.join(args.componentB_input, filename), "r") as handle:
23+
print(handle.read())
24+
25+
cur_time_str = datetime.now().strftime("%b-%d-%Y-%H-%M-%S")
26+
27+
print("Writing file: %s" % os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt"))
28+
with open(os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt"), "wt") as text_file:
29+
print(f"Logging date time: {cur_time_str}", file=text_file)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
2+
code: ../src
3+
command: >-
4+
python main.py train_check --config ${{inputs.data}}/model.yaml --train ${{inputs.data}}/train.csv --sanity-check ${{inputs.data}}/sanity_check.csv --min-accuracy 0.99 --min-precision 0.95 --min-recall 0.95 --model-dir ${{outputs.model}}
5+
inputs:
6+
data:
7+
path: .
8+
mode: download
9+
outputs:
10+
model:
11+
type: uri_folder
12+
environment:
13+
image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
14+
conda_file: ../src/environment.yml
15+
environment_variables:
16+
AZUREML_COMMON_RUNTIME_USE_SBOM_CAPABILITY: "true"
17+
compute: azureml:gpu-t4-spot-vpn
18+
display_name: Compete
19+
experiment_name: sensei-compete
20+
description: Sensei Compete Model
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from azure.identity import (
2+
DefaultAzureCredential,
3+
AzureCliCredential,
4+
InteractiveBrowserCredential,
5+
)
6+
from azure.ai.ml import MLClient, load_job
7+
from azure.ai.ml.entities import Data, ManagedOnlineEndpoint, Job, CommandComponent
8+
from azure.ai.ml.sweep import SweepJob, GridSamplingAlgorithm, Choice, Objective
9+
from azure.ai.ml import command
10+
from azure.ai.ml.constants import AssetTypes
11+
from azure.ai.ml.entities._load_functions import load_component
12+
13+
subscription_id = "2d385bf4-0756-4a76-aa95-28bf9ed3b625"
14+
resource_group = "sdkv2-20240925-rg"
15+
workspace_name = "sdkv2-20240925-ws"
16+
17+
18+
credential = DefaultAzureCredential()
19+
20+
print(credential)
21+
ml_client = MLClient(
22+
credential=credential,
23+
subscription_id=subscription_id,
24+
resource_group_name=resource_group,
25+
workspace_name=workspace_name,
26+
)
27+
28+
component = load_component(
29+
"C:\\Projects\\azure-sdk-for-python\\sdk\\ml\\azure-ai-ml\\azure\\ai\\ml\\YAMLsigning\\sum1.yaml"
30+
)
31+
ml_client.components.prepare_for_sign(component)

0 commit comments

Comments
 (0)