Skip to content

Commit c7c8ae6

Browse files
Magsschcursoragent
andauthored
[THIS-1049] Add fix() method to fixable validators (#1573)
# Description Add `fix()` implementations to the existing `MissingRequiresConstraint`, `SuboptimalRequiresConstraint`, `RequiresConstraintCycle`, and `MissingReverseDirectRelationTargetIndex` validators, enabling them to produce `FixAction`s that can be applied by the autofix infrastructure from part 1. Note: This PR introduces a slight change in how recommendations are made for `RequiresConstraintCycle` as to better align with the fix method. This issue itself will not recommend removing "all involved suboptimal constraints" involved in a specific cycle, but rather select the single "most problematic" one to remove, since that is all that is required to fix the cycle itself. In practice this mostly makes **no** difference for the user if they are running fix, however in the very specific edge case that they disable `MissingRequiresConstraint` and `SuboptimalRequiresConstraint`, running the validation for `RequiresConstraintCycle` will now only recommend removing the "one" constraint to fix the cycle itself. I consider this to be a more appropriately scoped recommendation/fix logic for this validator, as opposed to the previous logic, where the recommendations for `SuboptimalRequiresConstraint` could somewhat "leak" into this one. Analysis/query logic shared across validators has been moved to cached properties on `ValidationResources`. ## Bump - [ ] Patch - [x] Skip --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 54d531c commit c7c8ae6

File tree

19 files changed

+774
-170
lines changed

19 files changed

+774
-170
lines changed

cognite/neat/_data_model/_analysis.py

Lines changed: 134 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from cognite.neat._data_model.models.dms._constraints import RequiresConstraintDefinition
1515
from cognite.neat._data_model.models.dms._container import ContainerPropertyDefinition, ContainerRequest
1616
from cognite.neat._data_model.models.dms._data_types import DirectNodeRelation
17+
from cognite.neat._data_model.models.dms._indexes import BtreeIndex
1718
from cognite.neat._data_model.models.dms._limits import SchemaLimits
1819
from cognite.neat._data_model.models.dms._references import (
1920
ContainerDirectReference,
@@ -622,10 +623,10 @@ def implements_graph(self) -> nx.DiGraph:
622623
return graph
623624

624625
@cached_property
625-
def implements_cycles(self) -> list[list[ViewReference]]:
626+
def implements_cycles(self) -> list[tuple[ViewReference, ...]]:
626627
"""Find all cycles in the implements graph.
627628
Returns:
628-
List of lists, where each list contains the ordered Views involved in forming the implements cycle.
629+
List of tuples, where each tuple contains the ordered Views involved in forming the implements cycle.
629630
"""
630631

631632
return self.graph_cycles(self.implements_graph)
@@ -731,14 +732,23 @@ def _user_intentional_constraints(self) -> set[tuple[ContainerReference, Contain
731732
732733
A constraint is user-intentional if:
733734
1. The constraint identifier does NOT have '__auto' postfix
734-
2. Neither src nor dst is part of a cycle (cyclic constraints are errors)
735+
2. Neither src nor dst is part of a manually-created cycle
735736
736737
These constraints are preserved even if they're not in the optimal structure, because
737738
they may be used for data integrity purposes.
738-
We DON'T consider manual-created constraints as user-intended if they form part of a cycle,
739-
because that indicates a problem with the data model where we likely can provide a better solution.
739+
We DON'T consider manual-created constraints as user-intended if they form part of a cycle
740+
consisting entirely of manual constraints. We ignore these because it indicates a problem with
741+
the data model where we likely can provide a better solution. We don't consider cycles formed
742+
only by __auto constraints.
740743
"""
741-
containers_in_cycles = {container for cycle in self.requires_constraint_cycles for container in cycle}
744+
containers_in_cycles: set[ContainerReference] = set()
745+
for cycle in self.requires_constraint_cycles:
746+
if all(
747+
self.requires_constraint_graph.edges[cycle[i], cycle[(i + 1) % len(cycle)]].get("is_auto", False)
748+
for i in range(len(cycle))
749+
):
750+
continue
751+
containers_in_cycles.update(cycle)
742752

743753
return {
744754
(src, dst)
@@ -786,17 +796,57 @@ def forms_directed_path(nodes: set[_NodeT], graph: nx.DiGraph) -> bool:
786796
return False
787797

788798
@cached_property
789-
def requires_constraint_cycles(self) -> list[list[ContainerReference]]:
799+
def requires_constraint_cycles(self) -> list[tuple[ContainerReference, ...]]:
790800
"""Find all cycles in the requires constraint graph.
791801
Returns:
792-
List of lists, where each list contains the ordered containers involved in forming the requires cycle.
802+
List of tuples, where each tuple contains the ordered containers involved in forming the requires cycle.
793803
"""
794804
return self.graph_cycles(self.requires_constraint_graph)
795805

796806
@staticmethod
797-
def graph_cycles(graph: nx.DiGraph) -> list[list[T_Reference]]:
807+
def graph_cycles(graph: nx.DiGraph) -> list[tuple[T_Reference, ...]]:
798808
"""Returns cycles in the graph otherwise empty list"""
799-
return [candidate for candidate in nx.simple_cycles(graph) if len(candidate) > 1]
809+
return [tuple(candidate) for candidate in nx.simple_cycles(graph) if len(candidate) > 1]
810+
811+
def pick_cycle_constraint_to_remove(
812+
self, cycle: tuple[ContainerReference, ...]
813+
) -> tuple[ContainerReference, ContainerReference]:
814+
"""Pick the single best requires constraint to remove to break a cycle.
815+
816+
Selects a constraint edge from the cycle using a tiered preference:
817+
auto-generated constraints are preferred over user-defined ones, and
818+
edges that conflict with the direction considered optimal by the global optimizer
819+
are preferred over those that are only redundant and covered by other constraints.
820+
"""
821+
suboptimal_constraints: list[tuple[ContainerReference, ContainerReference]] = []
822+
for i, source_container_ref in enumerate(cycle):
823+
required_container_ref = cycle[(i + 1) % len(cycle)]
824+
if (source_container_ref, required_container_ref) not in self.oriented_mst_edges:
825+
suboptimal_constraints.append((source_container_ref, required_container_ref))
826+
827+
# Tier 1: auto-generated and wrong direction from optimal solution
828+
for source_ref, required_ref in suboptimal_constraints:
829+
if self.requires_constraint_graph.edges[source_ref, required_ref].get("is_auto", False):
830+
if (required_ref, source_ref) in self.oriented_mst_edges:
831+
return source_ref, required_ref
832+
833+
# Tier 2: auto-generated and redundant (covered by other constraints)
834+
for source_ref, required_ref in suboptimal_constraints:
835+
if self.requires_constraint_graph.edges[source_ref, required_ref].get("is_auto", False):
836+
return source_ref, required_ref
837+
838+
# Tier 3: user-defined and wrong direction from optimal solution
839+
for source_ref, required_ref in suboptimal_constraints:
840+
if (required_ref, source_ref) in self.oriented_mst_edges:
841+
return source_ref, required_ref
842+
843+
# Tier 4: user-defined and redundant (covered by other constraints)
844+
if suboptimal_constraints:
845+
return suboptimal_constraints[0]
846+
847+
raise RuntimeError(
848+
f"{type(self).__name__}: No removable constraint found in cycle {cycle}. This is a bug in NEAT."
849+
)
800850

801851
@cached_property
802852
def optimized_requires_constraint_graph(self) -> nx.DiGraph:
@@ -1000,6 +1050,80 @@ def _transitively_reduced_edges(self) -> set[tuple[ContainerReference, Container
10001050
# Return MST edges that survive reduction
10011051
return {e for e in reduced.edges() if e in self.oriented_mst_edges}
10021052

1053+
@cached_property
1054+
def missing_requires_constraints(
1055+
self,
1056+
) -> list[tuple[ViewReference, ContainerReference, ContainerReference]]:
1057+
"""Views with containers that are missing requires constraints needed for optimal query performance.
1058+
1059+
Each entry is a (view, source_container, required_container) tuple indicating that
1060+
source_container should have a requires constraint pointing to required_container.
1061+
"""
1062+
missing_requires_constraints: list[tuple[ViewReference, ContainerReference, ContainerReference]] = []
1063+
for view_ref in self.merged.views:
1064+
changes = self.get_requires_changes_for_view(view_ref)
1065+
if changes.status != RequiresChangeStatus.CHANGES_AVAILABLE:
1066+
continue
1067+
for source_container_ref, required_container_ref in changes.to_add:
1068+
missing_requires_constraints.append((view_ref, source_container_ref, required_container_ref))
1069+
return missing_requires_constraints
1070+
1071+
@cached_property
1072+
def suboptimal_requires_constraints(
1073+
self,
1074+
) -> list[tuple[ViewReference, ContainerReference, ContainerReference]]:
1075+
"""Views with containers that have suboptimal requires constraints that should be removed.
1076+
1077+
Each entry is a (view, source_container, required_container) tuple indicating that
1078+
the existing requires constraint from source_container to required_container is
1079+
redundant or wrongly oriented relative to the optimal structure.
1080+
"""
1081+
results: list[tuple[ViewReference, ContainerReference, ContainerReference]] = []
1082+
for view_ref in self.merged.views:
1083+
changes = self.get_requires_changes_for_view(view_ref)
1084+
if changes.status != RequiresChangeStatus.CHANGES_AVAILABLE:
1085+
continue
1086+
for source_container_ref, required_container_ref in changes.to_remove:
1087+
results.append((view_ref, source_container_ref, required_container_ref))
1088+
return results
1089+
1090+
@cached_property
1091+
def missing_reverse_relation_index_targets(
1092+
self,
1093+
) -> list[tuple[ResolvedReverseDirectRelation, tuple[str, BtreeIndex] | None]]:
1094+
"""Reverse direct relations missing a cursorable index on the target container property.
1095+
1096+
Returns:
1097+
List of tuples: (resolved_relation, existing_non_cursorable_index or None)
1098+
"""
1099+
targets: list[tuple[ResolvedReverseDirectRelation, tuple[str, BtreeIndex] | None]] = []
1100+
1101+
for resolved in self.resolved_reverse_direct_relations:
1102+
if not resolved.container or not resolved.container_property:
1103+
continue
1104+
if resolved.container_ref.space in COGNITE_SPACES:
1105+
continue
1106+
if not isinstance(resolved.container_property.type, DirectNodeRelation):
1107+
continue
1108+
if resolved.container_property.type.list:
1109+
continue
1110+
1111+
for index_id, index in (resolved.container.indexes or {}).items():
1112+
if not isinstance(index, BtreeIndex):
1113+
continue
1114+
if len(index.properties) != 1:
1115+
continue
1116+
if resolved.container_property_id not in index.properties:
1117+
continue
1118+
if index.cursorable:
1119+
break
1120+
targets.append((resolved, (index_id, index)))
1121+
break
1122+
else:
1123+
targets.append((resolved, None))
1124+
1125+
return targets
1126+
10031127
def get_requires_changes_for_view(self, view: ViewReference) -> RequiresChangesForView:
10041128
"""Get requires constraint changes needed to optimize a view.
10051129

cognite/neat/_data_model/_identifiers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import hashlib
2+
13
from pydantic import HttpUrl, RootModel, ValidationError
24

5+
from cognite.neat._data_model.models.dms import ContainerReference
36
from cognite.neat._utils.auxiliary import local_import
47

58

@@ -59,3 +62,47 @@ def as_rdflib_namespace(self): # type: ignore[no-untyped-def]
5962
from rdflib import Namespace
6063

6164
return Namespace(self.root)
65+
66+
67+
class AutoIdentifier:
68+
"""Generates identifiers for auto-created CDF constraints and indexes.
69+
70+
CDF has a 43-character limit on constraint/index identifiers. This class
71+
ensures generated IDs stay within that limit while maintaining uniqueness.
72+
"""
73+
74+
MAX_LENGTH = 43
75+
SUFFIX = "__auto"
76+
HASH_LENGTH = 8
77+
# base_id + suffix must fit in MAX_LENGTH
78+
_MAX_BASE_NO_HASH = MAX_LENGTH - len(SUFFIX)
79+
# base_id + "_" + hash + suffix must fit in MAX_LENGTH
80+
_MAX_BASE_WITH_HASH = _MAX_BASE_NO_HASH - HASH_LENGTH - 1
81+
82+
@classmethod
83+
def make(cls, base_id: str) -> str:
84+
"""Generate an auto identifier, truncating with a hash if needed.
85+
86+
Args:
87+
base_id: The primary identifier (e.g., external_id or property_id).
88+
89+
Returns:
90+
For short base_ids (<=37 chars): "{base_id}__auto"
91+
For long base_ids (>37 chars): "{truncated_id}_{hash}__auto"
92+
"""
93+
if len(base_id) <= cls._MAX_BASE_NO_HASH:
94+
return f"{base_id}{cls.SUFFIX}"
95+
96+
hash_suffix = hashlib.sha256(base_id.encode()).hexdigest()[: cls.HASH_LENGTH]
97+
truncated_id = base_id[: cls._MAX_BASE_WITH_HASH]
98+
return f"{truncated_id}_{hash_suffix}{cls.SUFFIX}"
99+
100+
@classmethod
101+
def for_constraint(cls, destination: ContainerReference) -> str:
102+
"""Generate a constraint identifier for auto-generated requires constraints."""
103+
return cls.make(destination.external_id)
104+
105+
@classmethod
106+
def for_index(cls, property_id: str) -> str:
107+
"""Generate an index identifier for auto-generated indexes."""
108+
return cls.make(property_id)

cognite/neat/_data_model/rules/dms/_containers.py

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""Validators for checking containers in the data model."""
22

3+
from cognite.neat._data_model._fix import FixAction
4+
from cognite.neat._data_model.deployer.data_classes import RemovedField, SeverityType
5+
from cognite.neat._data_model.models.dms._references import ContainerReference
36
from cognite.neat._data_model.models.dms._view_property import ViewCorePropertyRequest
47
from cognite.neat._data_model.rules.dms._base import DataModelRule
58
from cognite.neat._issues import ConsistencyError
@@ -209,32 +212,62 @@ class RequiresConstraintCycle(DataModelRule):
209212
code = f"{BASE_CODE}-005"
210213
issue_type = ConsistencyError
211214
alpha = True # Still in development
215+
fixable = True
212216

213217
def validate(self) -> list[ConsistencyError]:
214218
errors: list[ConsistencyError] = []
215-
optimal_edges = self.validation_resources.oriented_mst_edges
216-
217219
for cycle in self.validation_resources.requires_constraint_cycles:
218220
cycle_str = " -> ".join(str(c) for c in cycle) + f" -> {cycle[0]!s}"
219-
220-
# Find edges in cycle that are NOT in optimal structure (these should be removed)
221-
edges_to_remove = []
222-
for i, container in enumerate(cycle):
223-
next_container = cycle[(i + 1) % len(cycle)]
224-
edge = (container, next_container)
225-
if edge not in optimal_edges:
226-
edges_to_remove.append(f"{container} -> {next_container}")
227-
228-
message = f"Requires constraints form a cycle: {cycle_str}"
229-
if edges_to_remove:
230-
message += f". Recommended removal: {', '.join(edges_to_remove)} (not in optimal structure)"
231-
221+
source_container_ref, required_container_ref = self.validation_resources.pick_cycle_constraint_to_remove(
222+
cycle
223+
)
232224
errors.append(
233225
ConsistencyError(
234-
message=message,
235-
fix="Remove one of the requires constraints to break the cycle",
226+
message=(
227+
f"Requires constraints form a cycle: {cycle_str}. This can be fixed by removing the requires "
228+
f"constraint on {source_container_ref!s} to {required_container_ref!s}"
229+
),
230+
fix="Remove the recommended requires constraint to break the cycle",
236231
code=self.code,
237232
)
238233
)
239234

240235
return errors
236+
237+
def fix(self) -> list[FixAction]:
238+
"""Return fix actions to break requires constraint cycles."""
239+
fix_actions: list[FixAction] = []
240+
# Overlapping cycles can share the same edge to remove. Dedup here
241+
# because each constraint only needs to be removed once.
242+
seen: set[tuple[ContainerReference, ContainerReference]] = set()
243+
244+
for cycle in self.validation_resources.requires_constraint_cycles:
245+
source_container_ref, required_container_ref = self.validation_resources.pick_cycle_constraint_to_remove(
246+
cycle
247+
)
248+
if (source_container_ref, required_container_ref) in seen:
249+
continue
250+
seen.add((source_container_ref, required_container_ref))
251+
252+
container = self.validation_resources.select_container(source_container_ref)
253+
if not container:
254+
continue
255+
for constraint_id, constraint_def in self.validation_resources.get_requires_constraints(container):
256+
if constraint_def.require != required_container_ref:
257+
continue
258+
fix_actions.append(
259+
FixAction(
260+
code=self.code,
261+
resource_id=source_container_ref,
262+
changes=(
263+
RemovedField(
264+
field_path=f"constraints.{constraint_id}",
265+
current_value=constraint_def,
266+
item_severity=SeverityType.WARNING,
267+
),
268+
),
269+
message="Removed requires constraint to break cycle",
270+
)
271+
)
272+
273+
return fix_actions

0 commit comments

Comments
 (0)