Skip to content

Commit 4afed39

Browse files
authored
handle migrated kind nodes in diff and merge logic (#6401)
* fix bug in rel create for migrated kind nodes * handle agnostic rels correctly * WIP on including migrated kinds in diff * small changes to diff calculation queries for properties of nodes with migrated kind/inheritance * expand unit test for migrated kind diff... big time * finish up giant unit test * use db_id instead of labels, will need it for merge updates * move is_node_kind_migration outside of existing diff queries * format * fix broken tests/query * update DiffCombiner with identifiers * undo some changes in DiffQueryParser that are no longer needed * comment placement fix * test and support for merging branch with migrated kind node * update unit test for is_node_kind_migration * add changelog * remove commented out code * handle migrated node that is later deleted * add cypher comment * remove unused property * update comment, remove unnecessary int() casting * linting * one more test * add migration to delete diffs b/c they must be recalculated * actually include the migration
1 parent f16e7c8 commit 4afed39

25 files changed

+1861
-193
lines changed

backend/infrahub/core/diff/calculator.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from infrahub.core.query.diff import (
88
DiffCalculationQuery,
99
DiffFieldPathsQuery,
10+
DiffMigratedKindNodesQuery,
1011
DiffNodePathsQuery,
1112
DiffPropertyPathsQuery,
1213
)
@@ -15,7 +16,7 @@
1516
from infrahub.log import get_logger
1617

1718
from .model.field_specifiers_map import NodeFieldSpecifierMap
18-
from .model.path import CalculatedDiffs
19+
from .model.path import CalculatedDiffs, DiffNode, DiffRoot, NodeIdentifier
1920

2021
log = get_logger()
2122

@@ -59,7 +60,7 @@ async def _run_diff_calculation_query(
5960
)
6061
log.info(f"Beginning one diff calculation query {limit=}, {offset=}")
6162
await diff_query.execute(db=self.db)
62-
log.info("Diff calculation query complete")
63+
log.info(f"Diff calculation query complete {limit=}, {offset=}")
6364
last_result = None
6465
for query_result in diff_query.get_results():
6566
diff_parser.read_result(query_result=query_result)
@@ -69,6 +70,56 @@ async def _run_diff_calculation_query(
6970
has_more_data = last_result.get_as_type("has_more_data", bool)
7071
offset += limit
7172

73+
async def _apply_kind_migrated_nodes(
74+
self, branch_diff: DiffRoot, calculation_request: DiffCalculationRequest
75+
) -> None:
76+
has_more_data = True
77+
offset = 0
78+
limit = config.SETTINGS.database.query_size_limit
79+
diff_nodes_by_identifier = {n.identifier: n for n in branch_diff.nodes}
80+
diff_nodes_to_add: list[DiffNode] = []
81+
while has_more_data:
82+
diff_query = await DiffMigratedKindNodesQuery.init(
83+
db=self.db,
84+
branch=calculation_request.diff_branch,
85+
base_branch=calculation_request.base_branch,
86+
diff_branch_from_time=calculation_request.branch_from_time,
87+
diff_from=calculation_request.from_time,
88+
diff_to=calculation_request.to_time,
89+
limit=limit,
90+
offset=offset,
91+
)
92+
log.info(f"Getting one batch of migrated kind nodes {limit=}, {offset=}")
93+
await diff_query.execute(db=self.db)
94+
log.info(f"Migrated kind nodes query complete {limit=}, {offset=}")
95+
last_result = None
96+
for migrated_kind_node in diff_query.get_migrated_kind_nodes():
97+
migrated_kind_identifier = NodeIdentifier(
98+
uuid=migrated_kind_node.uuid,
99+
kind=migrated_kind_node.kind,
100+
db_id=migrated_kind_node.db_id,
101+
)
102+
if migrated_kind_identifier in diff_nodes_by_identifier:
103+
diff_node = diff_nodes_by_identifier[migrated_kind_identifier]
104+
diff_node.is_node_kind_migration = True
105+
continue
106+
new_diff_node = DiffNode(
107+
identifier=migrated_kind_identifier,
108+
changed_at=migrated_kind_node.from_time,
109+
action=migrated_kind_node.action,
110+
is_node_kind_migration=True,
111+
attributes=[],
112+
relationships=[],
113+
)
114+
diff_nodes_by_identifier[migrated_kind_identifier] = new_diff_node
115+
diff_nodes_to_add.append(new_diff_node)
116+
last_result = migrated_kind_node
117+
has_more_data = False
118+
if last_result:
119+
has_more_data = last_result.has_more_data
120+
offset += limit
121+
branch_diff.nodes.extend(diff_nodes_to_add)
122+
72123
async def calculate_diff(
73124
self,
74125
base_branch: Branch,
@@ -92,7 +143,7 @@ async def calculate_diff(
92143
)
93144
node_limit = int(config.SETTINGS.database.query_size_limit / 10)
94145
fields_limit = int(config.SETTINGS.database.query_size_limit / 3)
95-
properties_limit = int(config.SETTINGS.database.query_size_limit)
146+
properties_limit = config.SETTINGS.database.query_size_limit
96147

97148
calculation_request = DiffCalculationRequest(
98149
base_branch=base_branch,
@@ -132,7 +183,7 @@ async def calculate_diff(
132183
if base_branch.name != diff_branch.name:
133184
current_node_field_specifiers = diff_parser.get_current_node_field_specifiers()
134185
new_node_field_specifiers = diff_parser.get_new_node_field_specifiers()
135-
calculation_request = DiffCalculationRequest(
186+
base_calculation_request = DiffCalculationRequest(
136187
base_branch=base_branch,
137188
diff_branch=base_branch,
138189
branch_from_time=diff_branch_from_time,
@@ -146,7 +197,7 @@ async def calculate_diff(
146197
await self._run_diff_calculation_query(
147198
diff_parser=diff_parser,
148199
query_class=DiffNodePathsQuery,
149-
calculation_request=calculation_request,
200+
calculation_request=base_calculation_request,
150201
limit=node_limit,
151202
)
152203
log.info("Diff node-level calculation queries for base complete")
@@ -155,7 +206,7 @@ async def calculate_diff(
155206
await self._run_diff_calculation_query(
156207
diff_parser=diff_parser,
157208
query_class=DiffFieldPathsQuery,
158-
calculation_request=calculation_request,
209+
calculation_request=base_calculation_request,
159210
limit=fields_limit,
160211
)
161212
log.info("Diff field-level calculation queries for base complete")
@@ -164,17 +215,19 @@ async def calculate_diff(
164215
await self._run_diff_calculation_query(
165216
diff_parser=diff_parser,
166217
query_class=DiffPropertyPathsQuery,
167-
calculation_request=calculation_request,
218+
calculation_request=base_calculation_request,
168219
limit=properties_limit,
169220
)
170221
log.info("Diff property-level calculation queries for base complete")
171222

172223
log.info("Parsing calculated diff")
173224
diff_parser.parse(include_unchanged=include_unchanged)
174225
log.info("Calculated diff parsed")
226+
branch_diff = diff_parser.get_diff_root_for_branch(branch=diff_branch.name)
227+
await self._apply_kind_migrated_nodes(branch_diff=branch_diff, calculation_request=calculation_request)
175228
return CalculatedDiffs(
176229
base_branch_name=base_branch.name,
177230
diff_branch_name=diff_branch.name,
178231
base_branch_diff=diff_parser.get_diff_root_for_branch(branch=base_branch.name),
179-
diff_branch_diff=diff_parser.get_diff_root_for_branch(branch=diff_branch.name),
232+
diff_branch_diff=branch_diff,
180233
)

backend/infrahub/core/diff/combiner.py

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
EnrichedDiffRoot,
1515
EnrichedDiffs,
1616
EnrichedDiffSingleRelationship,
17+
NodeIdentifier,
1718
)
1819

1920

@@ -26,30 +27,35 @@ class NodePair:
2627
class DiffCombiner:
2728
def __init__(self) -> None:
2829
# {child_uuid: (parent_uuid, parent_rel_name)}
29-
self._child_parent_uuid_map: dict[str, tuple[str, str]] = {}
30-
self._parent_node_uuids: set[str] = set()
31-
self._earlier_nodes_by_uuid: dict[str, EnrichedDiffNode] = {}
32-
self._later_nodes_by_uuid: dict[str, EnrichedDiffNode] = {}
33-
self._common_node_uuids: set[str] = set()
30+
self._child_parent_identifier_map: dict[NodeIdentifier, tuple[NodeIdentifier, str]] = {}
31+
self._parent_node_identifiers: set[NodeIdentifier] = set()
32+
self._earlier_nodes_by_identifier: dict[NodeIdentifier, EnrichedDiffNode] = {}
33+
self._later_nodes_by_identifier: dict[NodeIdentifier, EnrichedDiffNode] = {}
34+
self._common_node_identifiers: set[NodeIdentifier] = set()
3435
self._diff_branch_name: str | None = None
3536

3637
def _initialize(self, earlier_diff: EnrichedDiffRoot, later_diff: EnrichedDiffRoot) -> None:
3738
self._diff_branch_name = earlier_diff.diff_branch_name
38-
self._child_parent_uuid_map = {}
39-
self._earlier_nodes_by_uuid = {}
40-
self._later_nodes_by_uuid = {}
41-
self._common_node_uuids = set()
39+
self._child_parent_identifier_map = {}
40+
self._earlier_nodes_by_identifier = {}
41+
self._later_nodes_by_identifier = {}
42+
self._common_node_identifiers = set()
4243
# map the parent of each node (if it exists), preference to the later diff
4344
for diff_root in (earlier_diff, later_diff):
4445
for child_node in diff_root.nodes:
4546
for parent_rel in child_node.relationships:
4647
for parent_node in parent_rel.nodes:
47-
self._child_parent_uuid_map[child_node.uuid] = (parent_node.uuid, parent_rel.name)
48+
self._child_parent_identifier_map[child_node.identifier] = (
49+
parent_node.identifier,
50+
parent_rel.name,
51+
)
4852
# UUIDs of all the parents, removing the stale parents from the earlier diff
49-
self._parent_node_uuids = {parent_tuple[0] for parent_tuple in self._child_parent_uuid_map.values()}
50-
self._earlier_nodes_by_uuid = {n.uuid: n for n in earlier_diff.nodes}
51-
self._later_nodes_by_uuid = {n.uuid: n for n in later_diff.nodes}
52-
self._common_node_uuids = set(self._earlier_nodes_by_uuid.keys()) & set(self._later_nodes_by_uuid.keys())
53+
self._parent_node_identifiers = {parent_tuple[0] for parent_tuple in self._child_parent_identifier_map.values()}
54+
self._earlier_nodes_by_identifier = {n.identifier: n for n in earlier_diff.nodes}
55+
self._later_nodes_by_identifier = {n.identifier: n for n in later_diff.nodes}
56+
self._common_node_identifiers = set(self._earlier_nodes_by_identifier.keys()) & set(
57+
self._later_nodes_by_identifier.keys()
58+
)
5359

5460
@property
5561
def diff_branch_name(self) -> str:
@@ -61,13 +67,13 @@ def _filter_nodes_to_keep(self, earlier_diff: EnrichedDiffRoot, later_diff: Enri
6167
filtered_node_pairs: list[NodePair] = []
6268
for earlier_node in earlier_diff.nodes:
6369
later_node: EnrichedDiffNode | None = None
64-
if earlier_node.uuid in self._common_node_uuids:
65-
later_node = self._later_nodes_by_uuid[earlier_node.uuid]
70+
if earlier_node.identifier in self._common_node_identifiers:
71+
later_node = self._later_nodes_by_identifier[earlier_node.identifier]
6672
# this is an out-of-date parent
6773
if (
6874
earlier_node.action is DiffAction.UNCHANGED
6975
and (later_node is None or later_node.action is DiffAction.UNCHANGED)
70-
and earlier_node.uuid not in self._parent_node_uuids
76+
and earlier_node.identifier not in self._parent_node_identifiers
7177
):
7278
continue
7379
if later_node is None:
@@ -79,15 +85,15 @@ def _filter_nodes_to_keep(self, earlier_diff: EnrichedDiffRoot, later_diff: Enri
7985
filtered_node_pairs.append(NodePair(earlier=earlier_node, later=later_node))
8086
for later_node in later_diff.nodes:
8187
# these have already been handled
82-
if later_node.uuid in self._common_node_uuids:
88+
if later_node.identifier in self._common_node_identifiers:
8389
continue
8490
filtered_node_pairs.append(NodePair(later=later_node))
8591
return filtered_node_pairs
8692

87-
def _get_parent_relationship_name(self, node_id: str) -> str | None:
88-
if node_id not in self._child_parent_uuid_map:
93+
def _get_parent_relationship_name(self, node_id: NodeIdentifier) -> str | None:
94+
if node_id not in self._child_parent_identifier_map:
8995
return None
90-
return self._child_parent_uuid_map[node_id][1]
96+
return self._child_parent_identifier_map[node_id][1]
9197

9298
def _should_include(self, earlier: DiffAction, later: DiffAction) -> bool:
9399
actions = {earlier, later}
@@ -284,7 +290,7 @@ def _combine_relationships(
284290
self,
285291
earlier_relationships: set[EnrichedDiffRelationship],
286292
later_relationships: set[EnrichedDiffRelationship],
287-
node_id: str,
293+
node_id: NodeIdentifier,
288294
) -> set[EnrichedDiffRelationship]:
289295
earlier_rels_by_name = {rel.name: rel for rel in earlier_relationships}
290296
later_rels_by_name = {rel.name: rel for rel in later_relationships}
@@ -365,7 +371,7 @@ def _combine_nodes(self, node_pairs: list[NodePair]) -> set[EnrichedDiffNode]:
365371
combined_relationships = self._combine_relationships(
366372
earlier_relationships=node_pair.earlier.relationships,
367373
later_relationships=node_pair.later.relationships,
368-
node_id=node_pair.later.uuid,
374+
node_id=node_pair.later.identifier,
369375
)
370376
if all(ca.action is DiffAction.UNCHANGED for ca in combined_attributes) and all(
371377
cr.action is DiffAction.UNCHANGED for cr in combined_relationships
@@ -380,14 +386,16 @@ def _combine_nodes(self, node_pairs: list[NodePair]) -> set[EnrichedDiffNode]:
380386
combined_attributes
381387
or combined_relationships
382388
or combined_conflict
383-
or node_pair.later.uuid in self._parent_node_uuids
389+
or node_pair.later.identifier in self._parent_node_identifiers
384390
):
385391
combined_nodes.add(
386392
EnrichedDiffNode(
387393
identifier=node_pair.later.identifier,
388394
label=node_pair.later.label,
389395
changed_at=node_pair.later.changed_at or node_pair.earlier.changed_at,
390396
action=combined_action,
397+
is_node_kind_migration=node_pair.earlier.is_node_kind_migration
398+
or node_pair.later.is_node_kind_migration,
391399
path_identifier=node_pair.later.path_identifier,
392400
attributes=combined_attributes,
393401
relationships=combined_relationships,
@@ -397,12 +405,12 @@ def _combine_nodes(self, node_pairs: list[NodePair]) -> set[EnrichedDiffNode]:
397405
return combined_nodes
398406

399407
def _link_child_nodes(self, nodes: Iterable[EnrichedDiffNode]) -> None:
400-
nodes_by_uuid: dict[str, EnrichedDiffNode] = {n.uuid: n for n in nodes}
401-
for child_node in nodes_by_uuid.values():
402-
if child_node.uuid not in self._child_parent_uuid_map:
408+
nodes_by_identifier: dict[NodeIdentifier, EnrichedDiffNode] = {n.identifier: n for n in nodes}
409+
for child_node in nodes_by_identifier.values():
410+
if child_node.identifier not in self._child_parent_identifier_map:
403411
continue
404-
parent_uuid, parent_rel_name = self._child_parent_uuid_map[child_node.uuid]
405-
parent_node = nodes_by_uuid[parent_uuid]
412+
parent_identifier, parent_rel_name = self._child_parent_identifier_map[child_node.identifier]
413+
parent_node = nodes_by_identifier[parent_identifier]
406414
parent_rel = child_node.get_relationship(name=parent_rel_name)
407415
parent_rel.nodes.add(parent_node)
408416

backend/infrahub/core/diff/enricher/hierarchy.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ async def _enrich_hierarchical_nodes(
9999

100100
current_node = node
101101
for ancestor in ancestors:
102-
ancestor_identifier = NodeIdentifier(uuid=ancestor.uuid, kind=ancestor.kind, labels=ancestor.labels)
102+
ancestor_identifier = NodeIdentifier(uuid=ancestor.uuid, kind=ancestor.kind, db_id=ancestor.db_id)
103103
parent_request = ParentNodeAddRequest(
104104
node_identifier=current_node.identifier,
105105
parent_identifier=ancestor_identifier,
@@ -146,13 +146,11 @@ async def _enrich_nodes_with_parent(
146146

147147
for peer in query.get_peers():
148148
source_identifier = NodeIdentifier(
149-
uuid=str(peer.source_id), kind=peer.source_kind, labels=peer.source_labels
149+
uuid=str(peer.source_id), kind=peer.source_kind, db_id=peer.source_db_id
150150
)
151151
parent_peers[source_identifier] = peer
152152
if parent_schema.has_parent_relationship:
153-
peer_identifier = NodeIdentifier(
154-
uuid=str(peer.peer_id), kind=peer.peer_kind, labels=peer.peer_labels
155-
)
153+
peer_identifier = NodeIdentifier(uuid=str(peer.peer_id), kind=peer.peer_kind, db_id=peer.peer_db_id)
156154
node_parent_with_parent_map[parent_schema.kind].append(peer_identifier)
157155

158156
# Check if the parent are already present
@@ -170,7 +168,7 @@ async def _enrich_nodes_with_parent(
170168
parent_rel = [rel for rel in schema_node.relationships if rel.kind == RelationshipKind.PARENT][0]
171169

172170
peer_identifier = NodeIdentifier(
173-
uuid=str(peer_parent.peer_id), kind=peer_parent.peer_kind, labels=peer_parent.peer_labels
171+
uuid=str(peer_parent.peer_id), kind=peer_parent.peer_kind, db_id=peer_parent.peer_db_id
174172
)
175173
parent_request = ParentNodeAddRequest(
176174
node_identifier=node.identifier,

backend/infrahub/core/diff/merger/merger.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@
33
from typing import TYPE_CHECKING
44

55
from infrahub.core import registry
6+
from infrahub.core.constants import DiffAction
67
from infrahub.core.diff.model.path import BranchTrackingId
7-
from infrahub.core.diff.query.merge import DiffMergePropertiesQuery, DiffMergeQuery, DiffMergeRollbackQuery
8+
from infrahub.core.diff.query.merge import (
9+
DiffMergeMigratedKindsQuery,
10+
DiffMergePropertiesQuery,
11+
DiffMergeQuery,
12+
DiffMergeRollbackQuery,
13+
)
814
from infrahub.log import get_logger
915

1016
if TYPE_CHECKING:
@@ -53,6 +59,16 @@ async def merge_graph(self, at: Timestamp) -> EnrichedDiffRoot:
5359
)
5460
log.info(f"Diff {latest_diff.uuid} retrieved")
5561
batch_num = 0
62+
migrated_kinds_id_map = {}
63+
for n in enriched_diff.nodes:
64+
if not n.is_node_kind_migration:
65+
continue
66+
if n.uuid not in migrated_kinds_id_map or (
67+
n.uuid in migrated_kinds_id_map and n.action is DiffAction.ADDED
68+
):
69+
# make sure that we use the ADDED db_id if it exists
70+
# it will not if a node was migrated and then deleted
71+
migrated_kinds_id_map[n.uuid] = n.identifier.db_id
5672
async for node_diff_dicts, property_diff_dicts in self.serializer.serialize_diff(diff=enriched_diff):
5773
if node_diff_dicts:
5874
log.info(f"Merging batch of nodes #{batch_num}")
@@ -62,6 +78,7 @@ async def merge_graph(self, at: Timestamp) -> EnrichedDiffRoot:
6278
at=at,
6379
target_branch=self.destination_branch,
6480
node_diff_dicts=node_diff_dicts,
81+
migrated_kinds_id_map=migrated_kinds_id_map,
6582
)
6683
await merge_query.execute(db=self.db)
6784
if property_diff_dicts:
@@ -72,10 +89,21 @@ async def merge_graph(self, at: Timestamp) -> EnrichedDiffRoot:
7289
at=at,
7390
target_branch=self.destination_branch,
7491
property_diff_dicts=property_diff_dicts,
92+
migrated_kinds_id_map=migrated_kinds_id_map,
7593
)
7694
await merge_properties_query.execute(db=self.db)
7795
log.info(f"Batch #{batch_num} merged")
7896
batch_num += 1
97+
migrated_kind_uuids = {n.identifier.uuid for n in enriched_diff.nodes if n.is_node_kind_migration}
98+
if migrated_kind_uuids:
99+
migrated_merge_query = await DiffMergeMigratedKindsQuery.init(
100+
db=self.db,
101+
branch=self.source_branch,
102+
at=at,
103+
target_branch=self.destination_branch,
104+
migrated_uuids=list(migrated_kind_uuids),
105+
)
106+
await migrated_merge_query.execute(db=self.db)
79107

80108
self.source_branch.branched_from = at.to_string()
81109
await self.source_branch.save(db=self.db)

backend/infrahub/core/diff/merger/serializer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(self, db: InfrahubDatabase, max_batch_size: int) -> None:
3939

4040
def _reset_caches(self) -> None:
4141
self._attribute_type_cache = {}
42+
self._conflicted_cardinality_one_relationships = set()
4243

4344
@property
4445
def source_branch_name(self) -> str:

0 commit comments

Comments
 (0)