Skip to content

Commit 0e883ff

Browse files
authored
[AutoML Image] added image search space schema changes from other repo (Azure#26364)
* added image search space schema changes from other repo * fixed the az ml job show validation error when automl image job is executed inside pipeline
1 parent d05374e commit 0e883ff

13 files changed

+1078
-666
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_schema/automl/image_vertical/image_model_distribution_settings.py

Lines changed: 145 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66

77
from marshmallow import fields, post_dump, post_load, pre_load
88

9+
from azure.ai.ml._restclient.v2022_06_01_preview.models import (
10+
LearningRateScheduler,
11+
ModelSize,
12+
StochasticOptimizer,
13+
ValidationMetricType,
14+
)
915
from azure.ai.ml._schema._sweep.search_space import (
1016
ChoiceSchema,
1117
NormalSchema,
@@ -14,12 +20,39 @@
1420
RandintSchema,
1521
UniformSchema,
1622
)
17-
from azure.ai.ml._schema.core.fields import NestedField, UnionField
23+
from azure.ai.ml._schema.core.fields import (
24+
DumpableIntegerField,
25+
DumpableStringField,
26+
NestedField,
27+
StringTransformedEnum,
28+
UnionField,
29+
)
1830
from azure.ai.ml._schema.core.schema import PatchedSchemaMeta
31+
from azure.ai.ml._utils.utils import camel_to_snake
32+
from azure.ai.ml.constants._job.automl import (
33+
ImageClassificationModelNames,
34+
ImageInstanceSegmentationModelNames,
35+
ImageObjectDetectionModelNames,
36+
)
37+
38+
39+
def get_choice_schema_of_type(cls, **kwargs):
40+
class CustomChoiceSchema(ChoiceSchema):
41+
values = fields.List(cls(**kwargs))
42+
43+
return CustomChoiceSchema()
44+
1945

20-
SEARCH_SPACE_UNION_FIELD = UnionField(
46+
def get_choice_and_single_value_schema_of_type(cls, **kwargs):
47+
return UnionField([cls(**kwargs), NestedField(get_choice_schema_of_type(cls, **kwargs))])
48+
49+
50+
FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField(
2151
[
22-
NestedField(ChoiceSchema()),
52+
fields.Float(),
53+
DumpableIntegerField(strict=True),
54+
NestedField(get_choice_schema_of_type(DumpableIntegerField, strict=True)),
55+
NestedField(get_choice_schema_of_type(fields.Float)),
2356
NestedField(UniformSchema()),
2457
NestedField(QUniformSchema()),
2558
NestedField(NormalSchema()),
@@ -28,54 +61,100 @@
2861
]
2962
)
3063

64+
INT_SEARCH_SPACE_DISTRIBUTION_FIELD = UnionField(
65+
[
66+
DumpableIntegerField(strict=True),
67+
NestedField(get_choice_schema_of_type(DumpableIntegerField, strict=True)),
68+
NestedField(RandintSchema()),
69+
]
70+
)
71+
72+
STRING_SEARCH_SPACE_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(DumpableStringField)
73+
BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(fields.Bool)
74+
75+
classification_model_name_args = {"allowed_values": [o.value for o in ImageClassificationModelNames]}
76+
detection_model_name_args = {"allowed_values": [o.value for o in ImageObjectDetectionModelNames]}
77+
segmentation_model_name_args = {"allowed_values": [o.value for o in ImageInstanceSegmentationModelNames]}
78+
model_size_enum_args = {"allowed_values": [o.value for o in ModelSize], "casing_transform": camel_to_snake}
79+
learning_rate_scheduler_enum_args = {
80+
"allowed_values": [o.value for o in LearningRateScheduler],
81+
"casing_transform": camel_to_snake,
82+
}
83+
optimizer_enum_args = {"allowed_values": [o.value for o in StochasticOptimizer], "casing_transform": camel_to_snake}
84+
validation_metric_enum_args = {
85+
"allowed_values": [o.value for o in ValidationMetricType],
86+
"casing_transform": camel_to_snake,
87+
}
88+
89+
CLASSIFICATION_MODEL_NAME_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
90+
StringTransformedEnum, **classification_model_name_args
91+
)
92+
DETECTION_MODEL_NAME_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
93+
StringTransformedEnum, **detection_model_name_args
94+
)
95+
SEGMENTATION_MODEL_NAME_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
96+
StringTransformedEnum, **segmentation_model_name_args
97+
)
98+
MODEL_SIZE_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
99+
StringTransformedEnum, **model_size_enum_args
100+
)
101+
LEARNING_RATE_SCHEDULER_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
102+
StringTransformedEnum, **learning_rate_scheduler_enum_args
103+
)
104+
OPTIMIZER_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(StringTransformedEnum, **optimizer_enum_args)
105+
VALIDATION_METRIC_DISTRIBUTION_FIELD = get_choice_and_single_value_schema_of_type(
106+
StringTransformedEnum, **validation_metric_enum_args
107+
)
108+
31109

32110
class ImageModelDistributionSettingsSchema(metaclass=PatchedSchemaMeta):
33-
ams_gradient = SEARCH_SPACE_UNION_FIELD
34-
beta1 = SEARCH_SPACE_UNION_FIELD
35-
beta2 = SEARCH_SPACE_UNION_FIELD
36-
distributed = SEARCH_SPACE_UNION_FIELD
37-
early_stopping = SEARCH_SPACE_UNION_FIELD
38-
early_stopping_delay = SEARCH_SPACE_UNION_FIELD
39-
early_stopping_patience = SEARCH_SPACE_UNION_FIELD
40-
evaluation_frequency = SEARCH_SPACE_UNION_FIELD
41-
enable_onnx_normalization = SEARCH_SPACE_UNION_FIELD
42-
gradient_accumulation_step = SEARCH_SPACE_UNION_FIELD
43-
layers_to_freeze = SEARCH_SPACE_UNION_FIELD
44-
learning_rate = SEARCH_SPACE_UNION_FIELD
45-
learning_rate_scheduler = SEARCH_SPACE_UNION_FIELD
46-
model_name = SEARCH_SPACE_UNION_FIELD
47-
momentum = SEARCH_SPACE_UNION_FIELD
48-
nesterov = SEARCH_SPACE_UNION_FIELD
49-
number_of_epochs = SEARCH_SPACE_UNION_FIELD
50-
number_of_workers = SEARCH_SPACE_UNION_FIELD
51-
optimizer = SEARCH_SPACE_UNION_FIELD
52-
random_seed = SEARCH_SPACE_UNION_FIELD
53-
step_lr_gamma = SEARCH_SPACE_UNION_FIELD
54-
step_lr_step_size = SEARCH_SPACE_UNION_FIELD
55-
training_batch_size = SEARCH_SPACE_UNION_FIELD
56-
validation_batch_size = SEARCH_SPACE_UNION_FIELD
57-
warmup_cosine_lr_cycles = SEARCH_SPACE_UNION_FIELD
58-
warmup_cosine_lr_warmup_epochs = SEARCH_SPACE_UNION_FIELD
59-
weight_decay = SEARCH_SPACE_UNION_FIELD
111+
ams_gradient = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
112+
augmentations = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD
113+
beta1 = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
114+
beta2 = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
115+
distributed = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
116+
early_stopping = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
117+
early_stopping_delay = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
118+
early_stopping_patience = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
119+
evaluation_frequency = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
120+
enable_onnx_normalization = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
121+
gradient_accumulation_step = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
122+
layers_to_freeze = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
123+
learning_rate = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
124+
learning_rate_scheduler = LEARNING_RATE_SCHEDULER_DISTRIBUTION_FIELD
125+
momentum = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
126+
nesterov = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
127+
number_of_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
128+
number_of_workers = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
129+
optimizer = OPTIMIZER_DISTRIBUTION_FIELD
130+
random_seed = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
131+
step_lr_gamma = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
132+
step_lr_step_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
133+
training_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
134+
validation_batch_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
135+
warmup_cosine_lr_cycles = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
136+
warmup_cosine_lr_warmup_epochs = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
137+
weight_decay = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
60138

61139

62140
class ImageModelDistributionSettingsClassificationSchema(ImageModelDistributionSettingsSchema):
63-
training_crop_size = SEARCH_SPACE_UNION_FIELD
64-
validation_crop_size = SEARCH_SPACE_UNION_FIELD
65-
validation_resize_size = SEARCH_SPACE_UNION_FIELD
66-
weighted_loss = SEARCH_SPACE_UNION_FIELD
141+
model_name = CLASSIFICATION_MODEL_NAME_DISTRIBUTION_FIELD
142+
training_crop_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
143+
validation_crop_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
144+
validation_resize_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
145+
weighted_loss = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
67146

68147
@post_dump
69148
def conversion(self, data, **kwargs):
70149
if self.context.get("inside_pipeline", False):
71150
# AutoML job inside pipeline does load(dump) instead of calling to_rest_object explicitly for creating the autoRest Object from sdk job.
72151
# Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method.
73152
# For standalone automl job, same conversion happens in image_classification_job._to_rest_object()
74-
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import _convert_sweep_dist_dict_to_str
153+
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import (
154+
_convert_sweep_dist_dict_to_str_dict,
155+
)
75156

76-
for k, sweep_dist_dict in data.items():
77-
if sweep_dist_dict is not None:
78-
data[k] = _convert_sweep_dist_dict_to_str(sweep_dist_dict)
157+
data = _convert_sweep_dist_dict_to_str_dict(data)
79158
return data
80159

81160
@pre_load
@@ -84,9 +163,7 @@ def before_make(self, data, **kwargs):
84163
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import _convert_sweep_dist_str_to_dict
85164

86165
# Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema.
87-
for k, val in data.items():
88-
if isinstance(val, str):
89-
data[k] = _convert_sweep_dist_str_to_dict(val)
166+
data = _convert_sweep_dist_str_to_dict(data)
90167
return data
91168

92169
@post_load
@@ -96,20 +173,20 @@ def make(self, data, **kwargs):
96173
return ImageClassificationSearchSpace(**data)
97174

98175

99-
class ImageModelDistributionSettingsObjectDetectionSchema(ImageModelDistributionSettingsSchema):
100-
box_detections_per_image = SEARCH_SPACE_UNION_FIELD
101-
box_score_threshold = SEARCH_SPACE_UNION_FIELD
102-
image_size = SEARCH_SPACE_UNION_FIELD
103-
max_size = SEARCH_SPACE_UNION_FIELD
104-
min_size = SEARCH_SPACE_UNION_FIELD
105-
model_size = SEARCH_SPACE_UNION_FIELD
106-
multi_scale = SEARCH_SPACE_UNION_FIELD
107-
nms_iou_threshold = SEARCH_SPACE_UNION_FIELD
108-
tile_grid_size = SEARCH_SPACE_UNION_FIELD
109-
tile_overlap_ratio = SEARCH_SPACE_UNION_FIELD
110-
tile_predictions_nms_threshold = SEARCH_SPACE_UNION_FIELD
111-
validation_iou_threshold = SEARCH_SPACE_UNION_FIELD
112-
validation_metric_type = SEARCH_SPACE_UNION_FIELD
176+
class ImageModelDistributionSettingsDetectionCommonSchema(ImageModelDistributionSettingsSchema):
177+
box_detections_per_image = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
178+
box_score_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
179+
image_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
180+
max_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
181+
min_size = INT_SEARCH_SPACE_DISTRIBUTION_FIELD
182+
model_size = MODEL_SIZE_DISTRIBUTION_FIELD
183+
multi_scale = BOOL_SEARCH_SPACE_DISTRIBUTION_FIELD
184+
nms_iou_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
185+
tile_grid_size = STRING_SEARCH_SPACE_DISTRIBUTION_FIELD
186+
tile_overlap_ratio = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
187+
tile_predictions_nms_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
188+
validation_iou_threshold = FLOAT_SEARCH_SPACE_DISTRIBUTION_FIELD
189+
validation_metric_type = VALIDATION_METRIC_DISTRIBUTION_FIELD
113190

114191
@post_dump
115192
def conversion(self, data, **kwargs):
@@ -118,11 +195,11 @@ def conversion(self, data, **kwargs):
118195
# from sdk job object.
119196
# Hence for pipeline job, we explicitly convert Sweep Distribution dict to str after dump in this method.
120197
# For standalone automl job, same conversion happens in image_object_detection_job._to_rest_object()
121-
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import _convert_sweep_dist_dict_to_str
198+
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import (
199+
_convert_sweep_dist_dict_to_str_dict,
200+
)
122201

123-
for k, sweep_dist_dict in data.items():
124-
if sweep_dist_dict is not None:
125-
data[k] = _convert_sweep_dist_dict_to_str(sweep_dist_dict)
202+
data = _convert_sweep_dist_dict_to_str_dict(data)
126203
return data
127204

128205
@pre_load
@@ -131,13 +208,19 @@ def before_make(self, data, **kwargs):
131208
from azure.ai.ml.entities._job.automl.image.image_search_space_utils import _convert_sweep_dist_str_to_dict
132209

133210
# Converting Sweep Distribution str to Sweep Distribution dict for complying with search_space schema.
134-
for k, val in data.items():
135-
if isinstance(val, str):
136-
data[k] = _convert_sweep_dist_str_to_dict(val)
211+
data = _convert_sweep_dist_str_to_dict(data)
137212
return data
138213

139214
@post_load
140215
def make(self, data, **kwargs):
141216
from azure.ai.ml.automl import ImageObjectDetectionSearchSpace
142217

143218
return ImageObjectDetectionSearchSpace(**data)
219+
220+
221+
class ImageModelDistributionSettingsObjectDetectionSchema(ImageModelDistributionSettingsDetectionCommonSchema):
222+
model_name = DETECTION_MODEL_NAME_DISTRIBUTION_FIELD
223+
224+
225+
class ImageModelDistributionSettingsInstanceSegmentationSchema(ImageModelDistributionSettingsObjectDetectionSchema):
226+
model_name = SEGMENTATION_MODEL_NAME_DISTRIBUTION_FIELD

sdk/ml/azure-ai-ml/azure/ai/ml/_schema/automl/image_vertical/image_object_detection.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
TaskType,
1515
)
1616
from azure.ai.ml._schema.automl.image_vertical.image_model_distribution_settings import (
17+
ImageModelDistributionSettingsInstanceSegmentationSchema,
1718
ImageModelDistributionSettingsObjectDetectionSchema,
1819
)
1920
from azure.ai.ml._schema.automl.image_vertical.image_model_settings import (
@@ -26,11 +27,7 @@
2627
from azure.ai.ml.constants import AutoMLConstants
2728

2829

29-
class ImageObjectDetectionBaseSchema(ImageVerticalSchema):
30-
search_space = fields.List(NestedField(ImageModelDistributionSettingsObjectDetectionSchema()))
31-
32-
33-
class ImageObjectDetectionSchema(ImageObjectDetectionBaseSchema):
30+
class ImageObjectDetectionSchema(ImageVerticalSchema):
3431
task_type = StringTransformedEnum(
3532
allowed_values=TaskType.IMAGE_OBJECT_DETECTION,
3633
casing_transform=camel_to_snake,
@@ -43,14 +40,15 @@ class ImageObjectDetectionSchema(ImageObjectDetectionBaseSchema):
4340
load_default=camel_to_snake(ObjectDetectionPrimaryMetrics.MEAN_AVERAGE_PRECISION),
4441
)
4542
training_parameters = NestedField(ImageModelSettingsObjectDetectionSchema())
43+
search_space = fields.List(NestedField(ImageModelDistributionSettingsObjectDetectionSchema()))
4644

4745
@post_load
4846
def make(self, data, **kwargs) -> Dict[str, Any]:
4947
data.pop("task_type")
5048
return data
5149

5250

53-
class ImageInstanceSegmentationSchema(ImageObjectDetectionBaseSchema):
51+
class ImageInstanceSegmentationSchema(ImageVerticalSchema):
5452
task_type = StringTransformedEnum(
5553
allowed_values=TaskType.IMAGE_INSTANCE_SEGMENTATION,
5654
casing_transform=camel_to_snake,
@@ -63,6 +61,7 @@ class ImageInstanceSegmentationSchema(ImageObjectDetectionBaseSchema):
6361
load_default=camel_to_snake(InstanceSegmentationPrimaryMetrics.MEAN_AVERAGE_PRECISION),
6462
)
6563
training_parameters = NestedField(ImageModelSettingsInstanceSegmentationSchema())
64+
search_space = fields.List(NestedField(ImageModelDistributionSettingsInstanceSegmentationSchema()))
6665

6766
@post_load
6867
def make(self, data, **kwargs) -> Dict[str, Any]:

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/automl/image/image_classification_job.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def _load_from_dict(
172172
from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMulticlassNodeSchema
173173

174174
if inside_pipeline:
175+
if context.get("inside_pipeline", None) is None:
176+
context["inside_pipeline"] = True
175177
loaded_data = load_from_dict(
176178
ImageClassificationMulticlassNodeSchema,
177179
data,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/automl/image/image_classification_multilabel_job.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ def _load_from_dict(
174174
from azure.ai.ml._schema.pipeline.automl_node import ImageClassificationMultilabelNodeSchema
175175

176176
if inside_pipeline:
177+
if context.get("inside_pipeline", None) is None:
178+
context["inside_pipeline"] = True
177179
loaded_data = load_from_dict(
178180
ImageClassificationMultilabelNodeSchema,
179181
data,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/automl/image/image_instance_segmentation_job.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def _load_from_dict(
172172
from azure.ai.ml._schema.pipeline.automl_node import ImageInstanceSegmentationNodeSchema
173173

174174
if inside_pipeline:
175+
if context.get("inside_pipeline", None) is None:
176+
context["inside_pipeline"] = True
175177
loaded_data = load_from_dict(
176178
ImageInstanceSegmentationNodeSchema,
177179
data,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/automl/image/image_object_detection_job.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ def _load_from_dict(
171171
from azure.ai.ml._schema.pipeline.automl_node import ImageObjectDetectionNodeSchema
172172

173173
if inside_pipeline:
174+
if context.get("inside_pipeline", None) is None:
175+
context["inside_pipeline"] = True
174176
loaded_data = load_from_dict(
175177
ImageObjectDetectionNodeSchema,
176178
data,

0 commit comments

Comments
 (0)