Skip to content

Commit f799c76

Browse files
Add support for promoting data asset from a workspace to a registry (Azure#28866)
* Support list/show and create data asset in a registry --------- Co-authored-by: Valerie Pham <[email protected]>
1 parent a2db262 commit f799c76

File tree

4 files changed

+113
-3
lines changed

4 files changed

+113
-3
lines changed

sdk/ml/azure-ai-ml/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Features Added
66
- Added support for `tags` on Compute Resources.
7+
- Added support for promoting data asset from a workspace to a registry
78

89
### Bugs Fixed
910

sdk/ml/azure-ai-ml/azure/ai/ml/constants/_common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
)
103103
ASSET_ID_RESOURCE_REGEX_FORMAT = "azureml://resource[gG]roups/([^/]+)/workspaces/([^/]+)/([^/]+)/([^/]+)/versions/(.+)"
104104
MODEL_ID_REGEX_FORMAT = "azureml://models/([^/]+)/versions/(.+)"
105+
DATA_ID_REGEX_FORMAT = "azureml://data/([^/]+)/versions/(.+)"
105106
ASSET_ID_URI_REGEX_FORMAT = "azureml://locations/([^/]+)/workspaces/([^/]+)/([^/]+)/([^/]+)/versions/(.+)"
106107
AZUREML_CLI_SYSTEM_EXECUTED_ENV_VAR = "AZUREML_CLI_SYSTEM_EXECUTED"
107108
DOCSTRING_TEMPLATE = ".. note:: {0} {1}\n\n"

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@
4545
get_sas_uri_for_registry_asset,
4646
)
4747
from azure.ai.ml._utils.utils import is_url
48-
from azure.ai.ml.constants._common import MLTABLE_METADATA_SCHEMA_URL_FALLBACK, AssetTypes
49-
from azure.ai.ml.entities._assets import Data
48+
from azure.ai.ml.constants._common import (
49+
MLTABLE_METADATA_SCHEMA_URL_FALLBACK,
50+
AssetTypes,
51+
ASSET_ID_FORMAT,
52+
AzureMLResourceType,
53+
)
54+
from azure.ai.ml.entities._assets import Data, WorkspaceAssetReference
5055
from azure.ai.ml.entities._data.mltable_metadata import MLTableMetadata
5156
from azure.ai.ml.exceptions import (
5257
AssetPathException,
@@ -58,6 +63,7 @@
5863
from azure.ai.ml.operations._datastore_operations import DatastoreOperations
5964
from azure.core.exceptions import HttpResponseError
6065
from azure.core.paging import ItemPaged
66+
from azure.core.exceptions import ResourceNotFoundError
6167

6268
ops_logger = OpsLogger(__name__)
6369
module_logger = ops_logger.module_logger
@@ -230,7 +236,6 @@ def create_or_update(self, data: Data) -> Data:
230236
:return: Data asset object.
231237
:rtype: ~azure.ai.ml.entities.Data
232238
"""
233-
234239
try:
235240
name = data.name
236241
if not data.version and self._registry_name:
@@ -246,6 +251,34 @@ def create_or_update(self, data: Data) -> Data:
246251

247252
sas_uri = None
248253
if self._registry_name:
254+
# If the data asset is a workspace asset, promote to registry
255+
if isinstance(data, WorkspaceAssetReference):
256+
try:
257+
self._operation.get(
258+
name=data.name,
259+
version=data.version,
260+
resource_group_name=self._resource_group_name,
261+
registry_name=self._registry_name,
262+
)
263+
except Exception as err: # pylint: disable=broad-except
264+
if isinstance(err, ResourceNotFoundError):
265+
pass
266+
else:
267+
raise err
268+
else:
269+
msg = "An data asset with this name and version already exists in registry"
270+
raise ValidationException(
271+
message=msg,
272+
no_personal_data_message=msg,
273+
target=ErrorTarget.DATA,
274+
error_category=ErrorCategory.USER_ERROR,
275+
)
276+
data = data._to_rest_object()
277+
result = self._service_client.resource_management_asset_reference.begin_import_method(
278+
resource_group_name=self._resource_group_name, registry_name=self._registry_name, body=data
279+
)
280+
return result
281+
249282
sas_uri = get_sas_uri_for_registry_asset(
250283
service_client=self._service_client,
251284
name=name,
@@ -445,6 +478,43 @@ def _get_latest_version(self, name: str) -> Data:
445478
)
446479
return self.get(name, version=latest_version)
447480

481+
# pylint: disable=no-self-use
482+
def _prepare_to_copy(
483+
self, data: Data, name: Optional[str] = None, version: Optional[str] = None
484+
) -> WorkspaceAssetReference:
485+
486+
"""Returns WorkspaceAssetReference
487+
to copy a registered data to registry given the asset id
488+
489+
:param data: Registered data
490+
:type data: Data
491+
:param name: Destination name
492+
:type name: str
493+
:param version: Destination version
494+
:type version: str
495+
"""
496+
# Get workspace info to get workspace GUID
497+
workspace = self._service_client.workspaces.get(
498+
resource_group_name=self._resource_group_name, workspace_name=self._workspace_name
499+
)
500+
workspace_guid = workspace.workspace_id
501+
workspace_location = workspace.location
502+
503+
# Get data asset ID
504+
asset_id = ASSET_ID_FORMAT.format(
505+
workspace_location,
506+
workspace_guid,
507+
AzureMLResourceType.DATA,
508+
data.name,
509+
data.version,
510+
)
511+
512+
return WorkspaceAssetReference(
513+
name=name if name else data.name,
514+
version=version if version else data.version,
515+
asset_id=asset_id,
516+
)
517+
448518

449519
def _assert_local_path_matches_asset_type(
450520
local_path: str,

sdk/ml/azure-ai-ml/tests/dataset/unittests/test_data_operations.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from azure.ai.ml.exceptions import ErrorTarget
2525
from azure.ai.ml.operations import DataOperations, DatastoreOperations
2626
from azure.core.paging import ItemPaged
27+
from azure.core.exceptions import ResourceNotFoundError
2728
from unittest.mock import ANY
2829

2930

@@ -562,3 +563,40 @@ def test_create_with_datastore(
562563
show_progress=True,
563564
ignore_file=None,
564565
)
566+
567+
def test_promote_data_from_workspace(
568+
self, mock_data_operations_in_registry: DataOperations, mock_data_operations: DataOperations, tmp_path: Path
569+
) -> None:
570+
data_asset_name = f"data_random_string"
571+
p = tmp_path / "data_full.yml"
572+
data_path = tmp_path / "data.pkl"
573+
data_path.write_text("hello world")
574+
p.write_text(
575+
f"""
576+
name: {data_asset_name}
577+
path: ./data.pkl
578+
version: 3"""
579+
)
580+
581+
with patch(
582+
"azure.ai.ml._artifacts._artifact_utilities._upload_to_datastore",
583+
return_value=ArtifactStorageInfo(
584+
name=data_asset_name,
585+
version="3",
586+
relative_path="path",
587+
datastore_arm_id="/subscriptions/mock/resourceGroups/mock/providers/Microsoft.MachineLearningServices/workspaces/mock/datastores/datastore_id",
588+
container_name="containerName",
589+
),
590+
) as mock_upload, patch(
591+
"azure.ai.ml.operations._data_operations.Data._from_rest_object",
592+
return_value=Data(),
593+
):
594+
data = load_data(source=p)
595+
data_to_promote = mock_data_operations._prepare_to_copy(data, "new_name", "new_version")
596+
assert data_to_promote.name == "new_name"
597+
assert data_to_promote.version == "new_version"
598+
mock_data_operations_in_registry._operation.get.side_effect = Mock(
599+
side_effect=ResourceNotFoundError("Test")
600+
)
601+
mock_data_operations_in_registry.create_or_update(data_to_promote)
602+
mock_data_operations_in_registry._service_client.resource_management_asset_reference.begin_import_method.assert_called_once()

0 commit comments

Comments
 (0)