Skip to content

Commit 33e7c57

Browse files
authored
diff OOM errors and performance (#5887)
* try getting the hierarchy for multiple nodes at once * batch getting display labels if over maximum node limit * a few more logging statements in the enrichers * split diff hierarchy linking query into smaller batches * split diff summary query in two * more logging, smaller pieces when saving a diff * Revert "try getting the hierarchy for multiple nodes at once" This reverts commit 2d624a9. * query update to improve performance of diff parent linking * add diff indices * clean up diff save method * improve diff save query performance * improve parent hierarchy enrichment performance * remove a few unused diff indices * fix typo * save query performance improvement * error message update
1 parent 6a8c7f3 commit 33e7c57

22 files changed

+474
-204
lines changed

backend/infrahub/core/diff/enricher/cardinality_one.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from infrahub.core.constants import NULL_VALUE, DiffAction, RelationshipCardinality
44
from infrahub.core.constants.database import DatabaseEdgeType
55
from infrahub.database import InfrahubDatabase
6+
from infrahub.log import get_logger
67

78
from ..model.path import (
89
CalculatedDiffs,
@@ -16,6 +17,8 @@
1617
if TYPE_CHECKING:
1718
from infrahub.core.schema import MainSchemaTypes
1819

20+
log = get_logger()
21+
1922

2023
class DiffCardinalityOneEnricher(DiffEnricherInterface):
2124
"""Clean up diffs for cardinality=one relationships to make them cleaner and more intuitive
@@ -34,13 +37,15 @@ def __init__(self, db: InfrahubDatabase):
3437

3538
async def enrich(self, enriched_diff_root: EnrichedDiffRoot, calculated_diffs: CalculatedDiffs) -> None:
3639
self._node_schema_map = {}
40+
log.info("Beginning cardinality-one diff enrichment...")
3741
for diff_node in enriched_diff_root.nodes:
3842
for relationship_group in diff_node.relationships:
3943
if (
4044
relationship_group.cardinality is RelationshipCardinality.ONE
4145
and len(relationship_group.relationships) > 0
4246
):
4347
self.consolidate_cardinality_one_diff_elements(diff_relationship=relationship_group)
48+
log.info("Cardinality-one diff enrichment complete.")
4449

4550
def _determine_action(self, previous_value: Any, new_value: Any) -> DiffAction:
4651
if previous_value == new_value:

backend/infrahub/core/diff/enricher/hierarchy.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,24 @@
77
from infrahub.core.query.relationship import RelationshipGetPeerQuery, RelationshipPeerData
88
from infrahub.core.schema import ProfileSchema
99
from infrahub.database import InfrahubDatabase
10+
from infrahub.log import get_logger
1011

1112
from ..model.path import (
1213
CalculatedDiffs,
1314
EnrichedDiffRoot,
1415
)
16+
from ..parent_node_adder import DiffParentNodeAdder, ParentNodeAddRequest
1517
from .interface import DiffEnricherInterface
1618

19+
log = get_logger()
20+
1721

1822
class DiffHierarchyEnricher(DiffEnricherInterface):
1923
"""Add hierarchy and parent/component nodes to diff even if the higher-level nodes are unchanged"""
2024

21-
def __init__(self, db: InfrahubDatabase):
25+
def __init__(self, db: InfrahubDatabase, parent_adder: DiffParentNodeAdder):
2226
self.db = db
27+
self.parent_adder = parent_adder
2328

2429
async def enrich(
2530
self, enriched_diff_root: EnrichedDiffRoot, calculated_diffs: CalculatedDiffs | None = None
@@ -28,6 +33,8 @@ async def enrich(
2833
# - A node has a relationship of kind parent
2934
# - A node is part of a hierarchy
3035

36+
log.info("Beginning hierarchical diff enrichment...")
37+
self.parent_adder.initialize(enriched_diff_root=enriched_diff_root)
3138
node_rel_parent_map: dict[str, list[str]] = defaultdict(list)
3239
node_hierarchy_map: dict[str, list[str]] = defaultdict(list)
3340

@@ -53,6 +60,7 @@ async def enrich(
5360

5461
await self._enrich_nodes_with_parent(enriched_diff_root=enriched_diff_root, node_map=node_rel_parent_map)
5562
await self._enrich_hierarchical_nodes(enriched_diff_root=enriched_diff_root, node_map=node_hierarchy_map)
63+
log.info("Hierarchical diff enrichment complete.")
5664

5765
async def _enrich_hierarchical_nodes(
5866
self,
@@ -63,6 +71,7 @@ async def _enrich_hierarchical_nodes(
6371

6472
# Retrieve the ID of all ancestors
6573
for kind, node_ids in node_map.items():
74+
log.info(f"Beginning hierarchy enrichment for {kind} node, num_nodes={len(node_ids)}...")
6675
hierarchy_schema = self.db.schema.get(
6776
name=kind, branch=enriched_diff_root.diff_branch_name, duplicate=False
6877
)
@@ -87,7 +96,7 @@ async def _enrich_hierarchical_nodes(
8796

8897
current_node = node
8998
for ancestor in ancestors:
90-
parent = enriched_diff_root.add_parent(
99+
parent_request = ParentNodeAddRequest(
91100
node_id=current_node.uuid,
92101
parent_id=str(ancestor.uuid),
93102
parent_kind=ancestor.kind,
@@ -97,6 +106,7 @@ async def _enrich_hierarchical_nodes(
97106
parent_rel_cardinality=parent_rel.cardinality,
98107
parent_rel_label=parent_rel.label or "",
99108
)
109+
parent = self.parent_adder.add_parent(parent_request=parent_request)
100110

101111
current_node = parent
102112

@@ -114,6 +124,7 @@ async def _enrich_nodes_with_parent(
114124

115125
# Query the UUID of the parent
116126
for kind, ids in node_map.items():
127+
log.info(f"Beginning parent enrichment for {kind} node, num_nodes={len(ids)}...")
117128
schema_node = self.db.schema.get(name=kind, branch=enriched_diff_root.diff_branch_name, duplicate=False)
118129

119130
parent_rel = [rel for rel in schema_node.relationships if rel.kind == RelationshipKind.PARENT][0]
@@ -138,15 +149,16 @@ async def _enrich_nodes_with_parent(
138149
# Check if the parent are already present
139150
# If parent is already in the list of node we need to add a relationship
140151
# If parent is not in the list of node, we need to add it
152+
diff_node_map = enriched_diff_root.get_node_map(node_uuids=set(parent_peers.keys()))
141153
for node_id, peer_parent in parent_peers.items():
142154
# TODO check if we can optimize this part to avoid querying this multiple times
143-
node = enriched_diff_root.get_node(node_uuid=node_id)
155+
node = diff_node_map[node_id]
144156
schema_node = self.db.schema.get(
145157
name=node.kind, branch=enriched_diff_root.diff_branch_name, duplicate=False
146158
)
147159
parent_rel = [rel for rel in schema_node.relationships if rel.kind == RelationshipKind.PARENT][0]
148160

149-
enriched_diff_root.add_parent(
161+
parent_request = ParentNodeAddRequest(
150162
node_id=node.uuid,
151163
parent_id=str(peer_parent.peer_id),
152164
parent_kind=peer_parent.peer_kind,
@@ -156,6 +168,7 @@ async def _enrich_nodes_with_parent(
156168
parent_rel_cardinality=parent_rel.cardinality,
157169
parent_rel_label=parent_rel.label or "",
158170
)
171+
self.parent_adder.add_parent(parent_request=parent_request)
159172

160173
if node_parent_with_parent_map:
161174
await self._enrich_nodes_with_parent(

backend/infrahub/core/diff/enricher/labels.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from infrahub.core.constants.database import DatabaseEdgeType
77
from infrahub.core.query.node import NodeGetKindQuery
88
from infrahub.database import InfrahubDatabase
9+
from infrahub.log import get_logger
910

1011
from ..model.path import (
1112
CalculatedDiffs,
@@ -17,6 +18,8 @@
1718
from ..payload_builder import get_display_labels
1819
from .interface import DiffEnricherInterface
1920

21+
log = get_logger()
22+
2023
PROPERTY_TYPES_WITH_LABELS = {DatabaseEdgeType.IS_RELATED, DatabaseEdgeType.HAS_OWNER, DatabaseEdgeType.HAS_SOURCE}
2124

2225

@@ -194,6 +197,7 @@ async def enrich(
194197
calculated_diffs: CalculatedDiffs | None = None,
195198
conflicts_only: bool = False,
196199
) -> None:
200+
log.info("Beginning display labels diff enrichment...")
197201
self._base_branch_name = enriched_diff_root.base_branch_name
198202
self._diff_branch_name = enriched_diff_root.diff_branch_name
199203
self._conflicts_only = conflicts_only
@@ -214,3 +218,4 @@ async def enrich(
214218
...
215219

216220
self._update_relationship_labels(enriched_diff=enriched_diff_root)
221+
log.info("Display labels diff enrichment complete.")

backend/infrahub/core/diff/enricher/path_identifier.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from infrahub.core.constants import PathType
22
from infrahub.core.path import DataPath
33
from infrahub.database import InfrahubDatabase
4+
from infrahub.log import get_logger
45

56
from ..model.path import CalculatedDiffs, EnrichedDiffRoot
67
from .interface import DiffEnricherInterface
78

9+
log = get_logger()
10+
811

912
class DiffPathIdentifierEnricher(DiffEnricherInterface):
1013
"""Add path identifiers to every element in the diff"""
@@ -20,6 +23,7 @@ def diff_branch_name(self) -> str:
2023
return self._diff_branch_name
2124

2225
async def enrich(self, enriched_diff_root: EnrichedDiffRoot, calculated_diffs: CalculatedDiffs) -> None:
26+
log.info("Beginning path identifier diff enrichment...")
2327
self._diff_branch_name = enriched_diff_root.diff_branch_name
2428
for node in enriched_diff_root.nodes:
2529
node_path = DataPath(
@@ -62,3 +66,4 @@ async def enrich(self, enriched_diff_root: EnrichedDiffRoot, calculated_diffs: C
6266
relationship_property_path = relationship_element_path.model_copy()
6367
relationship_property_path.property_name = relationship_property.property_type.value
6468
relationship_property.path_identifier = relationship_property_path.get_path(with_peer=False)
69+
log.info("Path identifier diff enrichment complete.")

backend/infrahub/core/diff/model/path.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,13 @@ def has_node(self, node_uuid: str) -> bool:
486486
except ValueError:
487487
return False
488488

489+
def get_node_map(self, node_uuids: set[str] | None = None) -> dict[str, EnrichedDiffNode]:
490+
node_map = {}
491+
for node in self.nodes:
492+
if node_uuids is None or node.uuid in node_uuids:
493+
node_map[node.uuid] = node
494+
return node_map
495+
489496
def get_all_conflicts(self) -> dict[str, EnrichedDiffConflict]:
490497
all_conflicts: dict[str, EnrichedDiffConflict] = {}
491498
for node in self.nodes:
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from dataclasses import dataclass, field
2+
3+
from infrahub.core.constants import DiffAction, RelationshipCardinality
4+
5+
from .model.path import EnrichedDiffNode, EnrichedDiffRelationship, EnrichedDiffRoot
6+
7+
8+
@dataclass
9+
class ParentNodeAddRequest:
10+
node_id: str
11+
parent_id: str
12+
parent_kind: str
13+
parent_label: str
14+
parent_rel_name: str
15+
parent_rel_identifier: str
16+
parent_rel_cardinality: RelationshipCardinality
17+
parent_rel_label: str = field(default="")
18+
19+
20+
class DiffParentNodeAdder:
21+
def __init__(self) -> None:
22+
self._diff_root: EnrichedDiffRoot | None = None
23+
self._node_map: dict[str, EnrichedDiffNode] = {}
24+
25+
def initialize(self, enriched_diff_root: EnrichedDiffRoot) -> None:
26+
self._diff_root = enriched_diff_root
27+
self._node_map = enriched_diff_root.get_node_map()
28+
29+
def get_root(self) -> EnrichedDiffRoot:
30+
if not self._diff_root:
31+
raise RuntimeError("Must call initialize before using")
32+
return self._diff_root
33+
34+
def get_node(self, node_uuid: str) -> EnrichedDiffNode:
35+
return self._node_map[node_uuid]
36+
37+
def has_node(self, node_uuid: str) -> bool:
38+
return node_uuid in self._node_map
39+
40+
def add_node(self, node: EnrichedDiffNode) -> None:
41+
if node.uuid in self._node_map:
42+
return
43+
self._node_map[node.uuid] = node
44+
self.get_root().nodes.add(node)
45+
46+
def add_parent(self, parent_request: ParentNodeAddRequest) -> EnrichedDiffNode:
47+
if not self._diff_root:
48+
raise RuntimeError("Must call initialize before using")
49+
node = self.get_node(node_uuid=parent_request.node_id)
50+
if not self.has_node(node_uuid=parent_request.parent_id):
51+
parent = EnrichedDiffNode(
52+
uuid=parent_request.parent_id,
53+
kind=parent_request.parent_kind,
54+
label=parent_request.parent_label,
55+
action=DiffAction.UNCHANGED,
56+
changed_at=None,
57+
)
58+
self.add_node(parent)
59+
else:
60+
parent = self.get_node(node_uuid=parent_request.parent_id)
61+
62+
try:
63+
rel = node.get_relationship(name=parent_request.parent_rel_name)
64+
rel.nodes.add(parent)
65+
except ValueError:
66+
node.relationships.add(
67+
EnrichedDiffRelationship(
68+
name=parent_request.parent_rel_name,
69+
identifier=parent_request.parent_rel_identifier,
70+
label=parent_request.parent_rel_label,
71+
cardinality=parent_request.parent_rel_cardinality,
72+
changed_at=None,
73+
action=DiffAction.UNCHANGED,
74+
nodes={parent},
75+
)
76+
)
77+
78+
return parent

backend/infrahub/core/diff/payload_builder.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import TYPE_CHECKING
44

5+
from infrahub import config
56
from infrahub.core.manager import NodeManager
67
from infrahub.core.registry import registry
78
from infrahub.exceptions import SchemaNotFoundError
@@ -26,8 +27,18 @@ async def get_display_labels_per_kind(
2627
if skip_missing_schema:
2728
return {}
2829
raise
29-
nodes = await NodeManager.get_many(ids=ids, fields=fields, db=db, branch=branch)
30-
return {node_id: await node.render_display_label(db=db) for node_id, node in nodes.items()}
30+
display_label_map: dict[str, str] = {}
31+
offset = 0
32+
limit = config.SETTINGS.database.query_size_limit
33+
while True:
34+
limited_ids = ids[offset : offset + limit]
35+
if not limited_ids:
36+
break
37+
node_map = await NodeManager.get_many(ids=limited_ids, fields=fields, db=db, branch=branch)
38+
for node_id, node in node_map.items():
39+
display_label_map[node_id] = await node.render_display_label(db=db)
40+
offset += limit
41+
return display_label_map
3142

3243

3344
async def get_display_labels(nodes: dict[str, dict[str, list[str]]], db: InfrahubDatabase) -> dict[str, dict[str, str]]:

0 commit comments

Comments
 (0)