Skip to content

Commit 82fe11f

Browse files
authored
Renyix/data import sdkv2 (Azure#29143)
* Add azureml data import verb sdk support * Create data import pipeline job * Update doc string * data import - add unit tests * data import - skip serverless compute validation * data import - fix unit tests * data import - fix DataImportSchema import build error * data import - remove used JobOperations to fix pylint build check * add __init__ to fix build checks * data import - fix line too long build error * data import - generate recording files * Fix Run Black build check error * Refine data import job submission * Add back some data import parameters * Mark data import classes as experimental * Resolve code review comments * Resolve code review comments * Remove unused import to fix pyint * data import - update e2e test recording file * Add back @experimental for DataImport class and disable asset_name filter * Update e2e test recording file * Fix unused 'name' pyint build check * Merge load_data_import into load_data * Revert unexpected change * Remove load_data_import func and update load_data doc string * Update data import e2e test recording file * Enable back asset_name filter as mfe 2023-02-01-preview is in place
1 parent d8fa77f commit 82fe11f

File tree

7 files changed

+25
-56
lines changed

7 files changed

+25
-56
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
load_component,
2323
load_compute,
2424
load_data,
25-
load_data_import,
2625
load_datastore,
2726
load_environment,
2827
load_job,
@@ -55,7 +54,6 @@
5554
"load_component",
5655
"load_compute",
5756
"load_data",
58-
"load_data_import",
5957
"load_datastore",
6058
"load_model",
6159
"load_environment",

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_data_import/data_import.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,18 @@ def _load(
6767
yaml_path: Optional[Union[PathLike, str]] = None,
6868
params_override: Optional[list] = None,
6969
**kwargs,
70-
) -> "DataImport":
70+
) -> Union["Data", "DataImport"]:
7171
data = data or {}
7272
params_override = params_override or []
7373
context = {
7474
BASE_PATH_CONTEXT_KEY: Path(yaml_path).parent if yaml_path else Path("./"),
7575
PARAMS_OVERRIDE_KEY: params_override,
7676
}
77-
data_import = DataImport._load_from_dict(yaml_data=data, context=context, **kwargs)
7877

79-
return data_import
78+
if "source" in data:
79+
return DataImport._load_from_dict(yaml_data=data, context=context, **kwargs)
80+
81+
return Data._load_from_dict(yaml_data=data, context=context, **kwargs)
8082

8183
@classmethod
8284
def _load_from_dict(cls, yaml_data: Dict, context: Dict, **kwargs) -> "DataImport":

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_load_functions.py

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ def load_data(
427427
*,
428428
relative_origin: Optional[str] = None,
429429
**kwargs,
430-
) -> Data:
430+
) -> Union[Data, DataImport]:
431431
"""Construct a data object from yaml file.
432432
433433
:param source: The local yaml source of a data object. Must be either a
@@ -447,39 +447,8 @@ def load_data(
447447
:type params_override: List[Dict]
448448
:raises ~azure.ai.ml.exceptions.ValidationException: Raised if Data cannot be successfully validated.
449449
Details will be provided in the error message.
450-
:return: Constructed data object.
451-
:rtype: Data
452-
"""
453-
return load_common(Data, source, relative_origin, **kwargs)
454-
455-
456-
def load_data_import(
457-
source: Union[str, PathLike, IO[AnyStr]],
458-
*,
459-
relative_origin: Optional[str] = None,
460-
**kwargs,
461-
) -> DataImport:
462-
"""Construct a data object from yaml file.
463-
464-
:param source: The local yaml source of a data object. Must be either a
465-
path to a local file, or an already-open file.
466-
If the source is a path, it will be open and read.
467-
An exception is raised if the file does not exist.
468-
If the source is an open file, the file will be read directly,
469-
and an exception is raised if the file is not readable.
470-
:type source: Union[PathLike, str, io.TextIOWrapper]
471-
:param relative_origin: The origin to be used when deducing
472-
the relative locations of files referenced in the parsed yaml.
473-
Defaults to the inputted source's directory if it is a file or file path input.
474-
Defaults to "./" if the source is a stream input with no name value.
475-
:type relative_origin: str
476-
:param params_override: Fields to overwrite on top of the yaml file.
477-
Format is [{"field1": "value1"}, {"field2": "value2"}]
478-
:type params_override: List[Dict]
479-
:raises ~azure.ai.ml.exceptions.ValidationException: Raised if DataImport cannot be successfully validated.
480-
Details will be provided in the error message.
481-
:return: Constructed data_import object.
482-
:rtype: DataImport
450+
:return: Constructed Data or DataImport object.
451+
:rtype: Union[Data, DataImport]
483452
"""
484453
return load_common(DataImport, source, relative_origin, **kwargs)
485454

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def show_materialization_status(
409409
"""
410410

411411
# TODO: Add back 'asset_name=name' filter once client switches to mfe 2023-02-01-preview and above
412-
return self._job_operation.list(job_type="Pipeline", tag=name, list_view_type=list_view_type)
412+
return self._job_operation.list(job_type="Pipeline", asset_name=name, list_view_type=list_view_type)
413413

414414
# @monitor_with_activity(logger, "Data.Validate", ActivityType.INTERNALCALL)
415415
def _validate(self, data: Data) -> Union[List[str], None]:

sdk/ml/azure-ai-ml/tests/data_import/e2etests/test_data_import.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22
from devtools_testutils import AzureRecordedTestCase
33

4-
from azure.ai.ml import MLClient, load_data_import
4+
from azure.ai.ml import MLClient, load_data
55
from azure.ai.ml.entities import PipelineJob, DataImport
66
from azure.ai.ml.entities._inputs_outputs.external_data import Database
77

@@ -14,7 +14,7 @@ class TestDataImport(AzureRecordedTestCase):
1414
# Please set ML_TENANT_ID in your environment variables when recording this test.
1515
# It will to help sanitize RequestBody.Studio.endpoint for job creation request.
1616
def test_data_import(self, client: MLClient) -> None:
17-
data_import = load_data_import(
17+
data_import = load_data(
1818
source="./tests/test_configs/data_import/data_import_e2e.yaml",
1919
)
2020
pipeline_job: PipelineJob = client.data.import_data(data_import)

sdk/ml/azure-ai-ml/tests/data_import/unittests/test_data_import.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
import pytest
22

3-
from azure.ai.ml import load_data_import
3+
from azure.ai.ml import load_data
44
from azure.ai.ml.data_transfer import import_data
55
from azure.ai.ml.entities import DataImport
66
from azure.ai.ml.entities._inputs_outputs import Output
77
from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem
8-
from azure.ai.ml.entities._job.data_transfer.data_transfer_job import DataTransferImportJob
98

109

1110
@pytest.mark.unittest
1211
@pytest.mark.data_import_test
1312
class TestDataImport:
1413
def test_data_import_database(self):
15-
data_import1 = load_data_import(source="./tests/test_configs/data_import/data_import_database.yaml")
14+
data_import1 = load_data(source="./tests/test_configs/data_import/data_import_database.yaml")
1615
data_import2 = DataImport(
1716
name="my_azuresqldb_asset",
1817
type="mltable",
@@ -33,7 +32,7 @@ def test_data_import_database(self):
3332
assert data_import1.source.connection == data_import2.source.connection
3433

3534
def test_data_import_file_system(self):
36-
data_import = load_data_import(source="./tests/test_configs/data_import/data_import_file_system.yaml")
35+
data_import = load_data(source="./tests/test_configs/data_import/data_import_file_system.yaml")
3736
import_job = import_data(
3837
source=FileSystem(path="test1/*", connection="azureml:my_s3_connection"),
3938
outputs={

sdk/ml/azure-ai-ml/tests/recordings/data_import/e2etests/test_data_import.pyTestDataImporttest_data_import.json

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"Entries": [
33
{
4-
"RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2022-12-01-preview",
4+
"RequestUri": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2023-02-01-preview",
55
"RequestMethod": "PUT",
66
"RequestHeaders": {
77
"Accept": "application/json",
@@ -55,22 +55,22 @@
5555
"StatusCode": 201,
5656
"ResponseHeaders": {
5757
"Cache-Control": "no-cache",
58-
"Content-Length": "2431",
58+
"Content-Length": "2458",
5959
"Content-Type": "application/json; charset=utf-8",
60-
"Date": "Fri, 03 Mar 2023 00:02:58 GMT",
60+
"Date": "Sat, 04 Mar 2023 17:16:02 GMT",
6161
"Expires": "-1",
62-
"Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2022-12-01-preview",
62+
"Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2023-02-01-preview",
6363
"Pragma": "no-cache",
6464
"request-context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1",
65-
"Server-Timing": "traceparent;desc=\u002200-28e082e30a9316b00c3313740706bec6-7d8ef3f3b1d0e411-01\u0022",
65+
"Server-Timing": "traceparent;desc=\u002200-77a768c16dccf36b76f68d283a64790c-17f112882b45e4d7-01\u0022",
6666
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
67-
"x-aml-cluster": "vienna-test-westus2-02",
67+
"x-aml-cluster": "vienna-test-westus2-01",
6868
"X-Content-Type-Options": "nosniff",
69-
"x-ms-correlation-request-id": "34732156-6bf6-4c79-bc18-5e6aca254a7f",
69+
"x-ms-correlation-request-id": "cb419d93-7c09-45e8-b365-1d21cc0f8b3d",
7070
"x-ms-ratelimit-remaining-subscription-writes": "1199",
7171
"x-ms-response-type": "standard",
72-
"x-ms-routing-request-id": "WESTUS:20230303T000259Z:34732156-6bf6-4c79-bc18-5e6aca254a7f",
73-
"x-request-time": "4.607"
72+
"x-ms-routing-request-id": "WESTCENTRALUS:20230304T171603Z:cb419d93-7c09-45e8-b365-1d21cc0f8b3d",
73+
"x-request-time": "4.352"
7474
},
7575
"ResponseBody": {
7676
"id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000",
@@ -119,6 +119,7 @@
119119
"isArchived": false,
120120
"identity": null,
121121
"componentId": null,
122+
"notificationSetting": null,
122123
"jobType": "Pipeline",
123124
"settings": {
124125
"force_rerun": true,
@@ -151,7 +152,7 @@
151152
"sourceJobId": null
152153
},
153154
"systemData": {
154-
"createdAt": "2023-03-03T00:02:58.7419134\u002B00:00",
155+
"createdAt": "2023-03-04T17:16:02.9271689\u002B00:00",
155156
"createdBy": "Firstname Lastname",
156157
"createdByType": "User"
157158
}

0 commit comments

Comments
 (0)