Skip to content

Commit 404717b

Browse files
lizy14MilesHolland
andauthored
Add data/datastore mount in AzureML SDK, depending on azureml-dataprep-rslex (#31223)
* add data.mount in SDK, depending on azureml-dataprep-rslex * impl datastore.mount() * Update docstring for datastore.mount * point to azureml-dataprep-rslex whl in setup.py * Use build on test index https://msdata.visualstudio.com/Vienna/_build/results?buildId=100511334&view=results https://dataprepdownloads.azureedge.net/pypi/test-M3ME5B1GMEM3SW0W/100511334/ * from azureml.dataprep import rslex_fuse_cli * update rslex version * pass debug bool argument * add type annotation; fix datastore docstring * add TODO comment * rename rslex_fuse_cli -> rslex_fuse_subprocess_wrapper * Add azureml-dataprep-rslex to shared_requirements.txt * # cspell:ignore rslex * black reformat * support persistent on CI (TODO: call MLC) * Add mlc swagger, update autorest config * overwrite with online version * Revert "overwrite with online version" This reverts commit d91db0d. * Revert "Add mlc swagger, update autorest config" This reverts commit be24871. * [stash] manually add updateDataMounts to swagger.json * generated operation for MLC updateDataMounts * add code to call compute operation: ? * [stash] * add body type in swagger.json * [stash] update path in command * generated compute operations with body type, both w/ & wo/ aio * Add body calling MLC update_data_mounts * black format * Fix: [ComputeInstanceDataMount] * specify api_version; poll; impl datastore op * pass user specified mount_point in persistent mount * Fix default mount_point; remove mount_name TODO * amend: parity in _datastore_operations.py * credential pass-in for data/datastore mount() * Add to DatastoreOperations self._compute_operation = serviceclient_2023_04_01_preview.compute * Revert "add body type in swagger.json" This reverts commit 5b7d67f. * Revert "[stash] manually add updateDataMounts to swagger.json" This reverts commit 9c59d0d. * Revert "generated compute operations with body type, both w/ & wo/ aio" This reverts commit a835869. * Revert "generated operation for MLC updateDataMounts" This reverts commit 46ef0a7. * Revert "[stash] update path in command" This reverts commit f4a682e. * Revert "Revert "generated operation for MLC updateDataMounts"" This reverts commit 1e686c0. * Revert "[stash]" This reverts commit e0586bb. * Revert "Revert "Revert "generated operation for MLC updateDataMounts""" This reverts commit 8bbe587. * Point to 2024-01-01 client * Fix NameError: name 'experimental' is not defined * black * Update mocks for data/datastore operations * fix mock syntax error * Add fixture mock_aml_services_2024_01_01_preview * Fix fixture name mock_aml_services_2024_01_01_preview * black formatting * Fix pylint: R1723(no-else-break), W0613(unused-argument), C4739(docstring-missing-param), C4740(docstring-missing-type) * Remove comments on TBD timeout * Disable E1123(unexpected-keyword-arg) temporarily * Revert "Disable E1123(unexpected-keyword-arg) temporarily" This reverts commit d60f1e4. * point to azureml-dataprep-rslex>=2.22.0 * add rslex to spell check allowlist * ", where the 'CI_NAME' environment variable is set." Co-authored-by: MilesHolland <[email protected]> * ", where the 'CI_NAME' environment variable is set." Co-authored-by: MilesHolland <[email protected]> * Add in _data_operations.py clause ", where the 'CI_NAME' environment variable is set." * Add clause in both datastore and data ", so that you can access data ..." * Change azureml-dataprep-rslex to be an extra, operation-specific dependency * Add unit test (azureml-dataprep-rslex ImportError) * Update dummy mount path to be under /tmp * fix errors from pylint * black reformat * UT patches azureml.dataprep.rslex_fuse_subprocess_wrapper --------- Co-authored-by: MilesHolland <[email protected]>
1 parent 22dbe17 commit 404717b

File tree

18 files changed

+327
-9
lines changed

18 files changed

+327
-9
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_ml_client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
# Same object, but was renamed starting in v2023_08_01_preview
4141
from azure.ai.ml._restclient.v2023_10_01 import AzureMachineLearningServices as ServiceClient102023
42+
from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024Preview
4243
from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationsContainer, OperationScope
4344
from azure.ai.ml._telemetry.logging_handler import get_appinsights_log_handler
4445
from azure.ai.ml._user_agent import USER_AGENT
@@ -407,6 +408,15 @@ def __init__(
407408
**kwargs,
408409
)
409410

411+
self._service_client_01_2024_preview = ServiceClient012024Preview(
412+
credential=self._credential,
413+
subscription_id=self._ws_operation_scope._subscription_id
414+
if registry_reference
415+
else self._operation_scope._subscription_id,
416+
base_url=base_url,
417+
**kwargs,
418+
)
419+
410420
self._workspaces = WorkspaceOperations(
411421
self._ws_operation_scope if registry_reference else self._operation_scope,
412422
self._service_client_08_2023_preview,
@@ -460,6 +470,7 @@ def __init__(
460470
operation_scope=self._operation_scope,
461471
operation_config=self._operation_config,
462472
serviceclient_2023_04_01_preview=self._service_client_04_2023_preview,
473+
serviceclient_2024_01_01_preview=self._service_client_01_2024_preview,
463474
**ops_kwargs,
464475
)
465476
self._operation_container.add(AzureMLResourceType.DATASTORE, self._datastores)
@@ -545,6 +556,7 @@ def __init__(
545556
self._operation_scope,
546557
self._operation_config,
547558
self._service_client_10_2021_dataplanepreview if registry_name else self._service_client_04_2023_preview,
559+
self._service_client_01_2024_preview,
548560
self._datastores,
549561
requests_pipeline=self._requests_pipeline,
550562
all_operations=self._operation_container,

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# pylint: disable=protected-access,no-value-for-parameter
66

77
import os
8+
import time
9+
import uuid
810
from contextlib import contextmanager
911
from pathlib import Path
1012
from typing import Any, Dict, Generator, Iterable, List, Optional, Union, cast
@@ -23,6 +25,8 @@
2325
)
2426
from azure.ai.ml._restclient.v2023_04_01_preview import AzureMachineLearningWorkspaces as ServiceClient042023_preview
2527
from azure.ai.ml._restclient.v2023_04_01_preview.models import ListViewType
28+
from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024_preview
29+
from azure.ai.ml._restclient.v2024_01_01_preview.models import ComputeInstanceDataMount
2630
from azure.ai.ml._scope_dependent_operations import (
2731
OperationConfig,
2832
OperationsContainer,
@@ -108,6 +112,7 @@ def __init__(
108112
operation_scope: OperationScope,
109113
operation_config: OperationConfig,
110114
service_client: Union[ServiceClient042023_preview, ServiceClient102021Dataplane],
115+
service_client_012024_preview: ServiceClient012024_preview,
111116
datastore_operations: DatastoreOperations,
112117
**kwargs: Dict,
113118
):
@@ -116,6 +121,7 @@ def __init__(
116121
self._operation = service_client.data_versions
117122
self._container_operation = service_client.data_containers
118123
self._datastore_operation = datastore_operations
124+
self._compute_operation = service_client_012024_preview.compute
119125
self._service_client = service_client
120126
self._init_kwargs = kwargs
121127
self._requests_pipeline: HttpPipeline = kwargs.pop("requests_pipeline")
@@ -738,6 +744,95 @@ def share(
738744
with self._set_registry_client(registry_name):
739745
return self.create_or_update(data_ref)
740746

747+
@monitor_with_activity(logger, "data.Mount", ActivityType.PUBLICAPI)
748+
@experimental
749+
def mount(
750+
self,
751+
path: str,
752+
mount_point: str = None,
753+
mode: str = "ro_mount",
754+
debug: bool = False,
755+
persistent: bool = False,
756+
**_kwargs,
757+
) -> None:
758+
"""Mount a data asset to a local path, so that you can access data inside it
759+
under a local path with any tools of your choice.
760+
761+
:param path: The data asset path to mount, in the form of `azureml:<name>` or `azureml:<name>:<version>`.
762+
:type path: str
763+
:param mount_point: A local path used as mount point.
764+
:type mount_point: str
765+
:param mode: Mount mode. Only `ro_mount` (read-only) is supported for data asset mount.
766+
:type mode: str
767+
:param debug: Whether to enable verbose logging.
768+
:type debug: bool
769+
:param persistent: Whether to persist the mount after reboot. Applies only when running on Compute Instance,
770+
where the 'CI_NAME' environment variable is set."
771+
:type persistent: bool
772+
:return: None
773+
"""
774+
775+
assert mode in ["ro_mount", "rw_mount"], "mode should be either `ro_mount` or `rw_mount`"
776+
read_only = mode == "ro_mount"
777+
assert read_only, "read-write mount for data asset is not supported yet"
778+
779+
ci_name = os.environ.get("CI_NAME")
780+
assert not persistent or (
781+
persistent and ci_name is not None
782+
), "persistent mount is only supported on Compute Instance, where the 'CI_NAME' environment variable is set."
783+
784+
try:
785+
from azureml.dataprep import rslex_fuse_subprocess_wrapper
786+
except ImportError as exc:
787+
raise Exception(
788+
"Mount operations requires package azureml-dataprep-rslex installed. "
789+
+ "You can install it with Azure ML SDK with `pip install azure-ai-ml[mount]`."
790+
) from exc
791+
792+
uri = rslex_fuse_subprocess_wrapper.build_data_asset_uri(
793+
self._operation_scope._subscription_id, self._resource_group_name, self._workspace_name, path
794+
)
795+
if persistent and ci_name is not None:
796+
mount_name = f"unified_mount_{str(uuid.uuid4()).replace('-', '')}"
797+
self._compute_operation.update_data_mounts(
798+
self._resource_group_name,
799+
self._workspace_name,
800+
ci_name,
801+
[
802+
ComputeInstanceDataMount(
803+
source=uri,
804+
source_type="URI",
805+
mount_name=mount_name,
806+
mount_action="Mount",
807+
mount_path=mount_point or "",
808+
)
809+
],
810+
api_version="2021-01-01",
811+
)
812+
print(f"Mount requested [name: {mount_name}]. Waiting for completion ...")
813+
while True:
814+
compute = self._compute_operation.get(self._resource_group_name, self._workspace_name, ci_name)
815+
mounts = compute.properties.properties.data_mounts
816+
try:
817+
mount = [mount for mount in mounts if mount.mount_name == mount_name][0]
818+
if mount.mount_state == "Mounted":
819+
print(f"Mounted [name: {mount_name}].")
820+
break
821+
if mount.mount_state == "MountRequested":
822+
pass
823+
elif mount.mount_state == "MountFailed":
824+
raise Exception(f"Mount failed [name: {mount_name}]: {mount.error}")
825+
else:
826+
raise Exception(f"Got unexpected mount state [name: {mount_name}]: {mount.mount_state}")
827+
except IndexError:
828+
pass
829+
time.sleep(5)
830+
831+
else:
832+
rslex_fuse_subprocess_wrapper.start_fuse_mount_subprocess(
833+
uri, mount_point, read_only, debug, credential=self._operation._config.credential
834+
)
835+
741836
@contextmanager
742837
# pylint: disable-next=docstring-missing-return,docstring-missing-rtype
743838
def _set_registry_client(self, registry_name: str) -> Generator:

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_datastore_operations.py

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
# pylint: disable=protected-access
66

7+
import time
8+
import uuid
79
from typing import Dict, Iterable, cast
810

911
from marshmallow.exceptions import ValidationError as SchemaValidationError
@@ -12,8 +14,11 @@
1214
from azure.ai.ml._restclient.v2023_04_01_preview import AzureMachineLearningWorkspaces as ServiceClient042023Preview
1315
from azure.ai.ml._restclient.v2023_04_01_preview.models import Datastore as DatastoreData
1416
from azure.ai.ml._restclient.v2023_04_01_preview.models import DatastoreSecrets, NoneDatastoreCredentials
17+
from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024Preview
18+
from azure.ai.ml._restclient.v2024_01_01_preview.models import ComputeInstanceDataMount
1519
from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationScope, _ScopeDependentOperations
1620
from azure.ai.ml._telemetry import ActivityType, monitor_with_activity
21+
from azure.ai.ml._utils._experimental import experimental
1722
from azure.ai.ml._utils._logger_utils import OpsLogger
1823
from azure.ai.ml.entities._datastore.datastore import Datastore
1924
from azure.ai.ml.exceptions import ValidationException
@@ -43,11 +48,13 @@ def __init__(
4348
operation_scope: OperationScope,
4449
operation_config: OperationConfig,
4550
serviceclient_2023_04_01_preview: ServiceClient042023Preview,
46-
**kwargs: Dict
51+
serviceclient_2024_01_01_preview: ServiceClient012024Preview,
52+
**kwargs: Dict,
4753
):
4854
super(DatastoreOperations, self).__init__(operation_scope, operation_config)
4955
ops_logger.update_info(kwargs)
5056
self._operation = serviceclient_2023_04_01_preview.datastores
57+
self._compute_operation = serviceclient_2024_01_01_preview.compute
5158
self._credential = serviceclient_2023_04_01_preview._config.credential
5259
self._init_kwargs = kwargs
5360

@@ -81,7 +88,7 @@ def _list_helper(datastore_resource: Datastore, include_secrets: bool) -> Datast
8188
resource_group_name=self._operation_scope.resource_group_name,
8289
workspace_name=self._workspace_name,
8390
cls=lambda objs: [_list_helper(obj, include_secrets) for obj in objs],
84-
**self._init_kwargs
91+
**self._init_kwargs,
8592
),
8693
)
8794

@@ -91,7 +98,7 @@ def _list_secrets(self, name: str) -> DatastoreSecrets:
9198
name=name,
9299
resource_group_name=self._operation_scope.resource_group_name,
93100
workspace_name=self._workspace_name,
94-
**self._init_kwargs
101+
**self._init_kwargs,
95102
)
96103

97104
@monitor_with_activity(logger, "Datastore.Delete", ActivityType.PUBLICAPI)
@@ -116,7 +123,7 @@ def delete(self, name: str) -> None:
116123
name=name,
117124
resource_group_name=self._operation_scope.resource_group_name,
118125
workspace_name=self._workspace_name,
119-
**self._init_kwargs
126+
**self._init_kwargs,
120127
)
121128

122129
@monitor_with_activity(logger, "Datastore.Get", ActivityType.PUBLICAPI)
@@ -144,7 +151,7 @@ def get(self, name: str, *, include_secrets: bool = False) -> Datastore:
144151
name=name,
145152
resource_group_name=self._operation_scope.resource_group_name,
146153
workspace_name=self._workspace_name,
147-
**self._init_kwargs
154+
**self._init_kwargs,
148155
)
149156
if include_secrets:
150157
self._fetch_and_populate_secret(datastore_resource)
@@ -182,7 +189,7 @@ def get_default(self, *, include_secrets: bool = False) -> Datastore:
182189
resource_group_name=self._operation_scope.resource_group_name,
183190
workspace_name=self._workspace_name,
184191
is_default=True,
185-
**self._init_kwargs
192+
**self._init_kwargs,
186193
).next()
187194
if include_secrets:
188195
self._fetch_and_populate_secret(datastore_resource)
@@ -223,3 +230,92 @@ def create_or_update(self, datastore: Datastore) -> Datastore:
223230
log_and_raise_error(ex)
224231
else:
225232
raise ex
233+
234+
@monitor_with_activity(logger, "Datastore.Mount", ActivityType.PUBLICAPI)
235+
@experimental
236+
def mount(
237+
self,
238+
path: str,
239+
mount_point: str = None,
240+
mode: str = "ro_mount",
241+
debug: bool = False,
242+
persistent: bool = False,
243+
**_kwargs,
244+
) -> None:
245+
"""Mount a datastore to a local path, so that you can access data inside it
246+
under a local path with any tools of your choice.
247+
248+
:param path: The data store path to mount, in the form of `<name>` or `azureml://datastores/<name>`.
249+
:type path: str
250+
:param mount_point: A local path used as mount point.
251+
:type mount_point: str
252+
:param mode: Mount mode, either `ro_mount` (read-only) or `rw_mount` (read-write).
253+
:type mode: str
254+
:param debug: Whether to enable verbose logging.
255+
:type debug: bool
256+
:param persistent: Whether to persist the mount after reboot. Applies only when running on Compute Instance,
257+
where the 'CI_NAME' environment variable is set."
258+
:type persistent: bool
259+
:return: None
260+
"""
261+
262+
assert mode in ["ro_mount", "rw_mount"], "mode should be either `ro_mount` or `rw_mount`"
263+
read_only = mode == "ro_mount"
264+
265+
import os
266+
267+
ci_name = os.environ.get("CI_NAME")
268+
assert not persistent or (
269+
persistent and ci_name is not None
270+
), "persistent mount is only supported on Compute Instance, where the 'CI_NAME' environment variable is set."
271+
272+
try:
273+
from azureml.dataprep import rslex_fuse_subprocess_wrapper
274+
except ImportError as exc:
275+
raise Exception(
276+
"Mount operations requires package azureml-dataprep-rslex installed. "
277+
+ "You can install it with Azure ML SDK with `pip install azure-ai-ml[mount]`."
278+
) from exc
279+
280+
uri = rslex_fuse_subprocess_wrapper.build_datastore_uri(
281+
self._operation_scope._subscription_id, self._resource_group_name, self._workspace_name, path
282+
)
283+
if persistent and ci_name is not None:
284+
mount_name = f"unified_mount_{str(uuid.uuid4()).replace('-', '')}"
285+
self._compute_operation.update_data_mounts(
286+
self._resource_group_name,
287+
self._workspace_name,
288+
ci_name,
289+
[
290+
ComputeInstanceDataMount(
291+
source=uri,
292+
source_type="URI",
293+
mount_name=mount_name,
294+
mount_action="Mount",
295+
mount_path=mount_point or "",
296+
)
297+
],
298+
api_version="2021-01-01",
299+
)
300+
print(f"Mount requested [name: {mount_name}]. Waiting for completion ...")
301+
while True:
302+
compute = self._compute_operation.get(self._resource_group_name, self._workspace_name, ci_name)
303+
mounts = compute.properties.properties.data_mounts
304+
try:
305+
mount = [mount for mount in mounts if mount.mount_name == mount_name][0]
306+
if mount.mount_state == "Mounted":
307+
print(f"Mounted [name: {mount_name}].")
308+
break
309+
if mount.mount_state == "MountRequested":
310+
pass
311+
elif mount.mount_state == "MountFailed":
312+
raise Exception(f"Mount failed [name: {mount_name}]: {mount.error}")
313+
else:
314+
raise Exception(f"Got unexpected mount state [name: {mount_name}]: {mount.mount_state}")
315+
except IndexError:
316+
pass
317+
time.sleep(5)
318+
else:
319+
rslex_fuse_subprocess_wrapper.start_fuse_mount_subprocess(
320+
uri, mount_point, read_only, debug, credential=self._operation._config.credential
321+
)

sdk/ml/azure-ai-ml/cspell.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"version": "0.2",
33
"ignoreWords": [
4-
"kwoa"
4+
"kwoa",
5+
"rslex"
56
]
67
}

sdk/ml/azure-ai-ml/dev_requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ mldesigner
2121
azure-mgmt-resourcegraph<9.0.0,>=2.0.0
2222
azure-mgmt-resource<23.0.0,>=3.0.0
2323
pytest-reportlog
24-
python-dotenv
24+
python-dotenv
25+
azureml-dataprep-rslex>=2.22.0

sdk/ml/azure-ai-ml/setup.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@
9595
"designer": [
9696
"mldesigner",
9797
],
98+
# user can run `pip install azure-ai-ml[mount]` to install azureml-dataprep-rslex alone with this package
99+
# so user can call data.mount() and datastore.mount() operations supported by it.
100+
"mount": [
101+
"azureml-dataprep-rslex>=2.22.0",
102+
],
98103
},
99104
project_urls={
100105
"Bug Reports": "https://github.com/Azure/azure-sdk-for-python/issues",

sdk/ml/azure-ai-ml/tests/batch_services/unittests/test_batch_endpoints.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ def mock_datastore_operations(
5353
mock_workspace_scope: OperationScope,
5454
mock_operation_config: OperationConfig,
5555
mock_aml_services_2023_04_01_preview: Mock,
56+
mock_aml_services_2024_01_01_preview: Mock,
5657
) -> CodeOperations:
5758
yield DatastoreOperations(
5859
operation_scope=mock_workspace_scope,
5960
operation_config=mock_operation_config,
6061
serviceclient_2023_04_01_preview=mock_aml_services_2023_04_01_preview,
62+
serviceclient_2024_01_01_preview=mock_aml_services_2024_01_01_preview,
6163
)
6264

6365

0 commit comments

Comments
 (0)