Skip to content

Commit 9c96c79

Browse files
authored
[CDF-27101] 🐢Build v2 module validation (#2790)
# Description <img width="873" height="340" alt="image" src="https://github.com/user-attachments/assets/944afd7c-2667-40d4-bca4-5037c2764f70" /> ## Bump - [ ] Patch - [x] Skip
1 parent e13a13f commit 9c96c79

File tree

9 files changed

+820
-1191
lines changed

9 files changed

+820
-1191
lines changed
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import difflib
2+
from collections import defaultdict
3+
from collections.abc import Iterable, Sequence
4+
from pathlib import Path
5+
from typing import Any, cast
6+
7+
from cognite_toolkit._cdf_tk.commands.build_v2.data_classes import (
8+
BuildSourceFiles,
9+
ModelSyntaxWarning,
10+
ModuleSource,
11+
RelativeDirPath,
12+
RelativeFilePath,
13+
)
14+
from cognite_toolkit._cdf_tk.commands.build_v2.data_classes._module import (
15+
AmbiguousSelection,
16+
BuildSource,
17+
BuildVariable,
18+
InvalidBuildVariable,
19+
MisplacedModule,
20+
NonExistingModuleName,
21+
)
22+
from cognite_toolkit._cdf_tk.constants import EXCL_FILES, MODULES
23+
from cognite_toolkit._cdf_tk.cruds import CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA, ResourceTypes
24+
25+
26+
class ModuleParser:
27+
VARIABLE_ERROR_CODE = "CONFIG_VARIABLE_001"
28+
29+
@classmethod
30+
def parse(cls, build: BuildSourceFiles) -> BuildSource:
31+
source_by_module_id, orphan_yaml_files = cls._find_modules(build.yaml_files, build.organization_dir)
32+
33+
module_ids = list(source_by_module_id.keys())
34+
available_paths = cls._expand_parents(module_ids)
35+
selected_modules = cls._select_modules(module_ids, build.selected_modules)
36+
selected_paths = cls._expand_parents(selected_modules)
37+
38+
module_paths_by_name: dict[str, list[RelativeDirPath]] = defaultdict(list)
39+
for module_path in module_ids:
40+
module_paths_by_name[module_path.name].append(module_path)
41+
42+
build_variables, invalid_variables = cls._parse_variables(build.variables, available_paths, selected_paths)
43+
44+
module_sources: list[ModuleSource] = []
45+
for module in selected_modules:
46+
source = source_by_module_id[module]
47+
module_specific_variables: dict[int | None, list[BuildVariable]] = defaultdict(list)
48+
for path in [module, *module.parents]:
49+
if path_variables := build_variables.get(path):
50+
for iteration, variables in path_variables.items():
51+
module_specific_variables[iteration].extend(variables)
52+
53+
if module_specific_variables:
54+
for iteration, variables in module_specific_variables.items():
55+
module_sources.append(
56+
source.model_copy(update={"variables": variables, "iteration": iteration or 0})
57+
)
58+
else:
59+
module_sources.append(source)
60+
61+
return BuildSource(
62+
module_dir=build.module_dir,
63+
modules=module_sources,
64+
invalid_variables=invalid_variables,
65+
non_existing_module_names=cls._get_non_existing_module_names(
66+
{name for name in build.selected_modules if isinstance(name, str)}, set(module_paths_by_name.keys())
67+
),
68+
misplaced_modules=cls._get_misplaced_modules(set(module_ids)),
69+
ambiguous_selection=cls._get_ambiguous_selection(module_paths_by_name, build.selected_modules),
70+
orphan_yaml_files=orphan_yaml_files,
71+
)
72+
73+
@classmethod
74+
def _find_modules(
75+
cls, yaml_files: list[RelativeFilePath], organization_dir: Path
76+
) -> tuple[dict[RelativeDirPath, ModuleSource], list[RelativeDirPath]]:
77+
"""Organizes YAML files by their module (top-level folder in the modules directory)."""
78+
source_by_module_id: dict[RelativeDirPath, ModuleSource] = {}
79+
orphan_files: list[RelativeDirPath] = []
80+
for yaml_file in yaml_files:
81+
if yaml_file.name in EXCL_FILES:
82+
continue
83+
relative_module_path, resource_folder = cls._get_module_path_from_resource_file_path(yaml_file)
84+
if relative_module_path and resource_folder:
85+
if relative_module_path not in source_by_module_id:
86+
source_by_module_id[relative_module_path] = ModuleSource(
87+
path=organization_dir / relative_module_path,
88+
id=relative_module_path,
89+
)
90+
source = source_by_module_id[relative_module_path]
91+
if resource_folder not in source.resource_files_by_folder:
92+
source.resource_files_by_folder[resource_folder] = []
93+
source.resource_files_by_folder[resource_folder].append(organization_dir / yaml_file)
94+
else:
95+
orphan_files.append(yaml_file)
96+
return source_by_module_id, orphan_files
97+
98+
@staticmethod
99+
def _get_module_path_from_resource_file_path(resource_file: Path) -> tuple[Path | None, ResourceTypes | None]:
100+
for parent in resource_file.parents:
101+
if parent.name in CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA:
102+
# We know that all keys in CRUDS_BY_FOLDER_NAME_INCLUDE_ALPHA are valid ResourceTypes,
103+
# so this cast is safe.
104+
return parent.parent, cast(ResourceTypes, parent.name)
105+
return None, None
106+
107+
@classmethod
108+
def _expand_parents(cls, module_ids: Sequence[Path]) -> set[Path]:
109+
return {Path("")} | set(module_ids) | {parent for module in module_ids for parent in module.parents}
110+
111+
@classmethod
112+
def _select_modules(
113+
cls, module_paths: Iterable[RelativeDirPath], selection: set[RelativeDirPath | str]
114+
) -> list[RelativeDirPath]:
115+
return [
116+
module_path
117+
for module_path in module_paths
118+
if module_path in selection
119+
or module_path.name in selection
120+
or any(parent in selection for parent in module_path.parents)
121+
]
122+
123+
@classmethod
124+
def _get_non_existing_module_names(
125+
cls, selected_module_names: set[str], available_names: set[str]
126+
) -> list[NonExistingModuleName]:
127+
non_existing: list[NonExistingModuleName] = []
128+
for name in sorted(selected_module_names - available_names):
129+
closest_matches = list(difflib.get_close_matches(name, available_names))
130+
non_existing.append(NonExistingModuleName(name=name, closest_matches=closest_matches))
131+
return non_existing
132+
133+
@classmethod
134+
def _get_misplaced_modules(cls, module_ids: set[RelativeDirPath]) -> list[MisplacedModule]:
135+
misplaced_modules: list[MisplacedModule] = []
136+
for module_path in sorted(module_ids):
137+
module_parents = set(module_path.parents)
138+
if parent_modules := (module_ids & module_parents):
139+
misplaced_modules.append(MisplacedModule(id=module_path, parent_modules=sorted(parent_modules)))
140+
return misplaced_modules
141+
142+
@classmethod
143+
def _get_ambiguous_selection(
144+
cls, module_paths_by_name: dict[str, list[RelativeDirPath]], selected_modules: set[str | RelativeDirPath]
145+
) -> list[AmbiguousSelection]:
146+
return [
147+
AmbiguousSelection(
148+
name=name,
149+
module_paths=module_paths,
150+
is_selected=name in selected_modules,
151+
)
152+
for name, module_paths in module_paths_by_name.items()
153+
if len(module_paths) > 1
154+
]
155+
156+
@classmethod
157+
def _parse_variables(
158+
cls, variables: dict[str, Any], available_paths: set[RelativeDirPath], selected_paths: set[RelativeDirPath]
159+
) -> tuple[dict[RelativeDirPath, dict[int | None, list[BuildVariable]]], list[InvalidBuildVariable]]:
160+
variables_by_path_and_iteration: dict[RelativeDirPath, dict[int | None, list[BuildVariable]]] = defaultdict(
161+
lambda: defaultdict(list)
162+
)
163+
invalid_variables: list[InvalidBuildVariable] = []
164+
to_check: list[tuple[RelativeDirPath, int | None, dict[str, Any]]] = [(Path(""), None, variables)]
165+
while to_check:
166+
path, iteration, subdict = to_check.pop()
167+
for key, value in subdict.items():
168+
subpath = path / key
169+
if isinstance(value, str | float | int | bool):
170+
variables_by_path_and_iteration[path][iteration].append(
171+
BuildVariable(id=subpath, value=value, is_selected=path in selected_paths, iteration=iteration)
172+
)
173+
elif isinstance(value, dict):
174+
if subpath in available_paths:
175+
to_check.append((subpath, iteration, value))
176+
else:
177+
invalid_variables.append(
178+
InvalidBuildVariable(
179+
id=subpath,
180+
value=str(value),
181+
is_selected=path in selected_paths,
182+
iteration=iteration,
183+
error=ModelSyntaxWarning(
184+
code=cls.VARIABLE_ERROR_CODE,
185+
message=f"Invalid variable path: {'.'.join(subpath.parts)}. This does not correspond to the "
186+
f"folder structure inside the {MODULES} directory.",
187+
fix="Ensure that the variable paths correspond to the folder structure inside the modules directory.",
188+
),
189+
)
190+
)
191+
elif isinstance(value, list):
192+
if all(isinstance(item, str | float | int | bool) for item in value):
193+
variables_by_path_and_iteration[path][iteration].append(
194+
BuildVariable(
195+
id=subpath, value=value, is_selected=path in selected_paths, iteration=iteration
196+
)
197+
)
198+
elif all(isinstance(item, dict) for item in value):
199+
for idx, item in enumerate(value, start=1):
200+
to_check.append((subpath, idx, item))
201+
else:
202+
invalid_variables.append(
203+
InvalidBuildVariable(
204+
id=subpath,
205+
value=str(value),
206+
is_selected=path in selected_paths,
207+
iteration=iteration,
208+
error=ModelSyntaxWarning(
209+
code=cls.VARIABLE_ERROR_CODE,
210+
message=f"Invalid variable type in list for variable {'.'.join(subpath.parts)}.",
211+
fix="Ensure that all items in the list are of the same supported type either (str, int, float, bool) or dict.",
212+
),
213+
)
214+
)
215+
else:
216+
raise NotImplementedError(f"Unsupported variable type: {type(value)} for variable {subpath}")
217+
return variables_by_path_and_iteration, invalid_variables

0 commit comments

Comments
Ā (0)