Skip to content

Commit 20214ab

Browse files
maxxgxCopilot
andauthored
Apply filtering parameters (#1046)
Co-authored-by: Copilot <[email protected]>
1 parent 58e1934 commit 20214ab

File tree

21 files changed

+283
-139
lines changed

21 files changed

+283
-139
lines changed

interactive_ai/services/director/app/service/job_submission/job_creation_helpers.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from communication.exceptions import JobCreationFailedException
1616
from features.feature_flag import FeatureFlag
17+
from service.configuration_service import ConfigurationService
1718

1819
from geti_types import ID, ProjectIdentifier
1920
from iai_core.entities.dataset_storage import DatasetStorage
@@ -51,6 +52,56 @@ class JobName(str, Enum):
5152
MODEL_TEST = "Model testing"
5253

5354

55+
@dataclass
56+
class FilteringParameters:
57+
"""
58+
Helper class to hold filtering parameters for optimize/test jobs.
59+
"""
60+
61+
min_annotation_size: int | None
62+
max_annotation_size: int | None
63+
min_number_of_annotations: int | None
64+
max_number_of_annotations: int | None
65+
66+
67+
def _get_filtering_parameters(project: Project, model: Model) -> FilteringParameters:
68+
"""
69+
Returns filtering parameters for training jobs based on the project and model.
70+
71+
:param project: Project containing the task node
72+
:param model: Model to be trained
73+
:return: FilteringParameters instance with the filtering parameters
74+
"""
75+
training_config = ConfigurationService.get_full_training_configuration(
76+
project_identifier=project.identifier,
77+
task_id=model.model_storage.task_node_id,
78+
model_manifest_id=model.model_storage.model_manifest_id,
79+
)
80+
filtering_params = training_config.global_parameters.dataset_preparation.filtering
81+
return FilteringParameters(
82+
min_annotation_size=(
83+
filtering_params.min_annotation_pixels.min_annotation_pixels
84+
if filtering_params.min_annotation_pixels and filtering_params.min_annotation_pixels.enable
85+
else None
86+
),
87+
max_annotation_size=(
88+
filtering_params.max_annotation_pixels.max_annotation_pixels
89+
if filtering_params.max_annotation_pixels and filtering_params.max_annotation_pixels.enable
90+
else None
91+
),
92+
min_number_of_annotations=(
93+
filtering_params.min_annotation_objects.min_annotation_objects
94+
if filtering_params.min_annotation_objects and filtering_params.min_annotation_objects.enable
95+
else None
96+
),
97+
max_number_of_annotations=(
98+
filtering_params.max_annotation_objects.max_annotation_objects
99+
if filtering_params.max_annotation_objects and filtering_params.max_annotation_objects.enable
100+
else None
101+
),
102+
)
103+
104+
54105
def get_model_storage_for_task(
55106
project_identifier: ProjectIdentifier,
56107
task_node_id: ID,
@@ -239,11 +290,14 @@ def create_payload(
239290
240291
:returns: a dict representing the job payload
241292
"""
293+
filter_parameters = _get_filtering_parameters(project=self.project, model=self.model)
242294
return {
243295
"project_id": self.project.id_,
244296
"model_test_result_id": self.model_test_result.id_,
245-
"min_annotation_size": self.min_annotation_size,
246-
"max_number_of_annotations": self.max_number_of_annotations,
297+
"min_annotation_size": filter_parameters.min_annotation_size,
298+
"max_annotation_size": filter_parameters.max_annotation_size,
299+
"min_number_of_annotations": filter_parameters.min_number_of_annotations,
300+
"max_number_of_annotations": filter_parameters.max_number_of_annotations,
247301
}
248302

249303
def create_metadata(self) -> dict:
@@ -285,9 +339,6 @@ class OptimizationJobData:
285339
training_dataset_storage: DatasetStorage
286340
model: Model
287341
optimization_type: ModelOptimizationType
288-
optimization_parameters: dict
289-
min_annotation_size: int | None = None
290-
max_number_of_annotations: int | None = None
291342

292343
@property
293344
def job_type(self) -> str:
@@ -308,7 +359,6 @@ def create_key(self) -> str:
308359
"dataset_storage_id": self.training_dataset_storage.id_,
309360
"model_storage_id": self.model.model_storage.id_,
310361
"type": self.job_type,
311-
"optimization_parameters": self.optimization_parameters,
312362
}
313363
return _serialize_job_key(job_key)
314364

@@ -318,6 +368,7 @@ def create_payload(self) -> dict:
318368
319369
:return: dict with the job's metadata
320370
"""
371+
filter_parameters = _get_filtering_parameters(project=self.project, model=self.model)
321372
return {
322373
"project_id": self.project.id_,
323374
"dataset_storage_id": self.training_dataset_storage.id_,
@@ -327,8 +378,10 @@ def create_payload(self) -> dict:
327378
# However, leaving this flag itself can make it easier to debug the future problem (e.g., CVS-142877).
328379
# You can still launch the job with `enable_optimize_from_dataset_shard=False` from Flyte console.
329380
"enable_optimize_from_dataset_shard": True,
330-
"min_annotation_size": self.min_annotation_size,
331-
"max_number_of_annotations": self.max_number_of_annotations,
381+
"min_annotation_size": filter_parameters.min_annotation_size,
382+
"max_annotation_size": filter_parameters.max_annotation_size,
383+
"min_number_of_annotations": filter_parameters.min_number_of_annotations,
384+
"max_number_of_annotations": filter_parameters.max_number_of_annotations,
332385
}
333386

334387
def create_metadata(self) -> dict:
Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
11
# Copyright (C) 2022-2025 Intel Corporation
22
# LIMITED EDGE SOFTWARE DISTRIBUTION LICENSE
33

4-
from typing import TYPE_CHECKING, cast
54

65
from service.job_submission.base import JobParams, ModelJobSubmitter
76
from service.job_submission.job_creation_helpers import OPTIMIZE_JOB_PRIORITY, JobDuplicatePolicy, OptimizationJobData
87

98
from geti_types import ID
10-
from iai_core.configuration.elements.component_parameters import ComponentType
11-
from iai_core.configuration.elements.dataset_manager_parameters import DatasetManagementConfig
129
from iai_core.entities.model import Model, ModelOptimizationType
1310
from iai_core.entities.project import Project
14-
from iai_core.repos import ConfigurableParametersRepo
15-
16-
if TYPE_CHECKING:
17-
from iai_core.configuration.elements.optimization_parameters import POTOptimizationParameters
1811

1912

2013
class ModelOptimizationJobSubmitter(ModelJobSubmitter):
@@ -34,32 +27,12 @@ def prepare_data( # type: ignore
3427
:param author: ID of the user submitting the job
3528
:return: ID of the optimization job that has been submitted.
3629
"""
37-
optimization_params = self._get_model_optimization_parameters(model=model)
38-
config_repo = ConfigurableParametersRepo(project.identifier)
39-
dataset_manager_config = config_repo.get_or_create_component_parameters(
40-
data_instance_of=DatasetManagementConfig,
41-
component=ComponentType.PIPELINE_DATASET_MANAGER,
42-
)
43-
min_annotation_size = (
44-
None
45-
if dataset_manager_config.minimum_annotation_size == -1
46-
else dataset_manager_config.minimum_annotation_size
47-
)
48-
max_number_of_annotations = (
49-
None
50-
if dataset_manager_config.maximum_number_of_annotations == -1
51-
else dataset_manager_config.maximum_number_of_annotations
52-
)
53-
5430
optimization_job_data = OptimizationJobData(
5531
workspace_id=project.workspace_id,
5632
project=project,
5733
training_dataset_storage=project.get_training_dataset_storage(),
5834
model=model,
5935
optimization_type=optimization_type,
60-
optimization_parameters=optimization_params,
61-
min_annotation_size=min_annotation_size,
62-
max_number_of_annotations=max_number_of_annotations,
6336
)
6437
return JobParams(
6538
priority=OPTIMIZE_JOB_PRIORITY,
@@ -74,18 +47,3 @@ def prepare_data( # type: ignore
7447
gpu_num_required=optimization_job_data.gpu_num_required,
7548
cancellable=True,
7649
)
77-
78-
@staticmethod
79-
def _get_model_optimization_parameters(model: Model) -> dict:
80-
"""
81-
Return the model optimization parameters as a dict.
82-
83-
:param model: the model to extract the optimization parameters from
84-
:returns: the optimization parameters for the model
85-
"""
86-
pot_parameters = cast(
87-
"POTOptimizationParameters",
88-
model.configuration.configurable_parameters.pot_parameters, # type: ignore[attr-defined]
89-
)
90-
91-
return {"stat_subset_size": pot_parameters.stat_subset_size}

interactive_ai/services/director/app/service/job_submission/test.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,8 @@
44
from service.job_submission.job_creation_helpers import MODEL_TEST_JOB_PRIORITY, JobDuplicatePolicy, ModelTestJobData
55

66
from geti_types import ID
7-
from iai_core.configuration.elements.component_parameters import ComponentType
8-
from iai_core.configuration.elements.dataset_manager_parameters import DatasetManagementConfig
97
from iai_core.entities.model_test_result import ModelTestResult
108
from iai_core.entities.project import Project
11-
from iai_core.repos import ConfigurableParametersRepo
129

1310

1411
class ModelTestingJobSubmitter(ModelJobSubmitter):
@@ -24,21 +21,6 @@ def prepare_data( # type: ignore
2421
:return: ID of the model test job that has been submitted to the jobs client
2522
"""
2623
model = model_test_result.get_model()
27-
config_repo = ConfigurableParametersRepo(project.identifier)
28-
dataset_manager_config = config_repo.get_or_create_component_parameters(
29-
data_instance_of=DatasetManagementConfig,
30-
component=ComponentType.PIPELINE_DATASET_MANAGER,
31-
)
32-
min_annotation_size = (
33-
None
34-
if dataset_manager_config.minimum_annotation_size == -1
35-
else dataset_manager_config.minimum_annotation_size
36-
)
37-
max_number_of_annotations = (
38-
None
39-
if dataset_manager_config.maximum_number_of_annotations == -1
40-
else dataset_manager_config.maximum_number_of_annotations
41-
)
4224
model_testing_job_data = ModelTestJobData(
4325
model_test_result=model_test_result,
4426
model=model,
@@ -49,8 +31,6 @@ def prepare_data( # type: ignore
4931
task_node=next(
5032
task for task in project.get_trainable_task_nodes() if task.id_ == model.model_storage.task_node_id
5133
),
52-
min_annotation_size=min_annotation_size,
53-
max_number_of_annotations=max_number_of_annotations,
5434
)
5535

5636
return JobParams(

interactive_ai/services/director/tests/integration/communication/test_integration_optimization_controller.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from communication.controllers.optimization_controller import OptimizationController
88
from communication.exceptions import NotReadyForOptimizationException
9+
from service.configuration_service import ConfigurationService
910

1011
from geti_types import ID
1112
from grpc_interfaces.job_submission.client import GRPCJobsClient
@@ -16,7 +17,14 @@
1617

1718

1819
class TestOptimizationController:
19-
def test_start_pot_job(self, fxt_db_project_service, fxt_pot_hyperparameters, fxt_mock_jobs_client) -> None:
20+
def test_start_pot_job(
21+
self,
22+
fxt_db_project_service,
23+
fxt_pot_hyperparameters,
24+
fxt_mock_jobs_client,
25+
fxt_training_configuration_task_level,
26+
fxt_global_parameters,
27+
) -> None:
2028
"""
2129
<b>Description:</b>
2230
Test POT optimization job creation
@@ -50,8 +58,17 @@ def test_start_pot_job(self, fxt_db_project_service, fxt_pot_hyperparameters, fx
5058
model.model_storage.model_template.entrypoints = EntryPoints(base="Base interface", nncf="NNCF interface")
5159
model_repo = ModelRepo(model.model_storage_identifier)
5260
model_repo.save(model)
61+
filtering_params = fxt_global_parameters.dataset_preparation.filtering
62+
5363
# Act
54-
with patch.object(GRPCJobsClient, "submit", return_value=None):
64+
with (
65+
patch.object(GRPCJobsClient, "submit", return_value=None),
66+
patch.object(
67+
ConfigurationService,
68+
"get_full_training_configuration",
69+
return_value=fxt_training_configuration_task_level,
70+
) as mock_get_full_training_configuration,
71+
):
5572
result = OptimizationController().start_optimization(
5673
project_id=project.id_,
5774
model_storage_id=model.model_storage.id_,
@@ -60,13 +77,17 @@ def test_start_pot_job(self, fxt_db_project_service, fxt_pot_hyperparameters, fx
6077
)
6178

6279
# Assert
80+
mock_get_full_training_configuration.assert_called_once_with(
81+
project_identifier=project.identifier,
82+
task_id=model.model_storage.task_node_id,
83+
model_manifest_id=model.model_storage.model_manifest_id,
84+
)
6385
fxt_mock_jobs_client._jobs_client.submit.assert_called_once_with(
6486
priority=1,
6587
job_name="Optimization",
6688
job_type="optimize_pot",
6789
key=f'{{"dataset_storage_id": "{dataset_storage.id_}",'
6890
f' "model_storage_id": "{model.model_storage.id_}",'
69-
f' "optimization_parameters": {{"stat_subset_size": 300}},'
7091
f' "project_id": "{project.id_}",'
7192
f' "type": "optimize_pot",'
7293
f' "workspace_id": "{project.workspace_id}"}}',
@@ -76,8 +97,10 @@ def test_start_pot_job(self, fxt_db_project_service, fxt_pot_hyperparameters, fx
7697
"model_storage_id": model.model_storage.id_,
7798
"project_id": project.id_,
7899
"enable_optimize_from_dataset_shard": True,
79-
"max_number_of_annotations": None,
80-
"min_annotation_size": None,
100+
"max_number_of_annotations": filtering_params.max_annotation_objects.max_annotation_objects,
101+
"min_number_of_annotations": filtering_params.min_annotation_objects.min_annotation_objects,
102+
"min_annotation_size": filtering_params.min_annotation_pixels.min_annotation_pixels,
103+
"max_annotation_size": filtering_params.max_annotation_pixels.max_annotation_pixels,
81104
},
82105
metadata={
83106
"base_model_id": model.id_,

0 commit comments

Comments
 (0)