Skip to content

Commit fad721d

Browse files
authored
Merge pull request #4740 from opsmill/lgu-diff-benchmark
Add DiffAllPathsQuery benchmark
2 parents 27f4f86 + 30560eb commit fad721d

File tree

8 files changed

+285
-122
lines changed

8 files changed

+285
-122
lines changed

backend/infrahub/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,9 @@ class DatabaseSettings(BaseSettings):
197197
tls_insecure: bool = Field(default=False, description="Indicates if TLS certificates are verified")
198198
tls_ca_file: Optional[str] = Field(default=None, description="File path to CA cert or bundle in PEM format")
199199
query_size_limit: int = Field(
200-
default=5000,
200+
default=5_000,
201201
ge=1,
202-
le=20000,
202+
le=20_000,
203203
description="The max number of records to fetch in a single query before performing internal pagination.",
204204
)
205205
max_depth_search_hierarchy: int = Field(

backend/infrahub/core/query/relationship.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ async def query_init(self, db: InfrahubDatabase, **kwargs) -> None:
446446
r1 = f"{arrows.left.start}[r1:{self.rel_type} $rel_prop ]{arrows.left.end}"
447447
r2 = f"{arrows.right.start}[r2:{self.rel_type} $rel_prop ]{arrows.right.end}"
448448

449+
# Specifying relationship type might improve query performance here.
449450
query = """
450451
MATCH (s:Node { uuid: $source_id })-[]-(rl:Relationship {uuid: $rel_id})-[]-(d:Node { uuid: $destination_id })
451452
CREATE (s)%s(rl)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from dataclasses import dataclass
2+
3+
from infrahub.database import Neo4jRuntime
4+
from tests.helpers.constants import NEO4J_ENTERPRISE_IMAGE
5+
6+
7+
@dataclass
8+
class BenchmarkConfig:
9+
neo4j_image: str = NEO4J_ENTERPRISE_IMAGE
10+
neo4j_runtime: Neo4jRuntime = Neo4jRuntime.DEFAULT
11+
load_db_indexes: bool = False
12+
13+
def __str__(self) -> str:
14+
return f"{self.neo4j_image=} ; runtime: {self.neo4j_runtime} ; indexes: {self.load_db_indexes}"

backend/tests/helpers/query_benchmark/car_person_generators.py

Lines changed: 130 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,154 @@
11
import random
22
import uuid
3-
from typing import Optional, Tuple
3+
from typing import Any, Optional, Tuple
44

55
from infrahub.core import registry
6+
from infrahub.core.branch import Branch
7+
from infrahub.core.manager import NodeManager
68
from infrahub.core.node import Node
79
from tests.helpers.query_benchmark.data_generator import DataGenerator
810
from tests.helpers.query_benchmark.db_query_profiler import InfrahubDatabaseProfiler
911

1012

1113
class CarGenerator(DataGenerator):
1214
async def load_data(self, nb_elements: int) -> None:
13-
await self.load_cars(nb_elements)
15+
default_branch = await registry.get_branch(db=self.db)
16+
await self.load_cars(default_branch, nb_elements)
1417

15-
async def load_cars(self, nb_cars: int, persons: Optional[dict[str, Node]] = None) -> dict[str, Node]:
16-
"""
17-
Load cars and return a mapping car_name -> car_node.
18-
If 'persons' is specified, each car created is linked to a person.
19-
"""
18+
async def load_car_random_name(self, branch: Branch, nbr_seats: int, **kwargs: Any) -> Node:
19+
car_schema = registry.schema.get_node_schema(name="TestCar", branch=branch)
2020

21-
default_branch = await registry.get_branch(db=self.db)
22-
car_schema = registry.schema.get_node_schema(name="TestCar", branch=default_branch)
21+
short_id = str(uuid.uuid4())[:8]
22+
car_name = f"car-{short_id}"
23+
car_node = await Node.init(db=self.db, schema=car_schema, branch=branch)
24+
await car_node.new(db=self.db, name=car_name, nbr_seats=nbr_seats, **kwargs)
25+
26+
return await car_node.save(db=self.db)
2327

28+
async def load_cars(self, branch: Branch, nb_cars: int, **kwargs: Any) -> dict[str, Node]:
2429
cars = {}
2530
for _ in range(nb_cars):
26-
short_id = str(uuid.uuid4())[:8]
27-
car_name = f"car-{short_id}"
28-
car_node = await Node.init(db=self.db, schema=car_schema, branch=default_branch)
29-
if persons is not None:
30-
random_person = random.choice([persons[person_name] for person_name in persons])
31-
await car_node.new(db=self.db, name=car_name, nbr_seats=4, owner=random_person)
32-
else:
33-
await car_node.new(db=self.db, name=car_name, nbr_seats=4)
31+
car_node = await self.load_car_random_name(nbr_seats=4, branch=branch, **kwargs)
32+
cars[car_node.name.value] = car_node # type: ignore[attr-defined]
3433

35-
async with self.db.start_session():
36-
await car_node.save(db=self.db)
34+
return cars
3735

38-
cars[car_name] = car_node
36+
37+
class EngineGenerator(DataGenerator):
38+
async def load_data(self, nb_elements: int) -> None:
39+
default_branch = await registry.get_branch(db=self.db)
40+
await self.load_engines(default_branch, nb_elements)
41+
42+
async def load_engines(self, branch: Branch, nb_cars: int, **kwargs: Any) -> dict[str, Node]:
43+
engines = {}
44+
for _ in range(nb_cars):
45+
engine_node = await self.load_engine_random_name(branch=branch, **kwargs)
46+
engines[engine_node.name.value] = engine_node # type: ignore[attr-defined]
47+
48+
return engines
49+
50+
async def load_engine_random_name(self, branch: Branch, **kwargs: Any) -> Node:
51+
engine_schema = registry.schema.get_node_schema(name="TestEngine", branch=branch)
52+
53+
short_id = str(uuid.uuid4())[:8]
54+
engine_name = f"engine-{short_id}"
55+
engine_node = await Node.init(db=self.db, schema=engine_schema, branch=branch)
56+
await engine_node.new(db=self.db, name=engine_name, **kwargs)
57+
58+
return await engine_node.save(db=self.db)
59+
60+
61+
class CarWithDiffInSecondBranchGenerator(CarGenerator):
62+
persons: Optional[dict[str, Node]] # mapping of existing cars names -> node
63+
nb_persons: int
64+
diff_ratio: float # 0.1 means 10% of added nodes, 10% of deleted nodes, 10% of modified nodes
65+
main_branch: Branch
66+
diff_branch: Branch
67+
68+
def __init__(
69+
self, db: InfrahubDatabaseProfiler, nb_persons: int, diff_ratio: float, main_branch: Branch, diff_branch: Branch
70+
) -> None:
71+
super().__init__(db)
72+
self.persons = None
73+
self.nb_persons = nb_persons
74+
self.diff_ratio = diff_ratio
75+
self.main_branch = main_branch
76+
self.diff_branch = diff_branch
77+
78+
async def init(self) -> None:
79+
"""Load persons, that will be later connected to generated cars."""
80+
self.persons = await PersonGenerator(self.db).load_persons(nb_persons=self.nb_persons)
81+
82+
async def load_cars_with_multiple_rels(self, branch: Branch, nb_cars: int) -> dict[str, Node]:
83+
assert self.persons is not None
84+
engine_generator = EngineGenerator(db=self.db)
85+
86+
cars = {}
87+
for _ in range(nb_cars):
88+
owner = random.choice([self.persons[person_name] for person_name in self.persons])
89+
drivers = random.choices([self.persons[person_name] for person_name in self.persons], k=nb_cars)
90+
engine = await engine_generator.load_engine_random_name(branch=branch)
91+
car = await self.load_car_random_name(
92+
branch=branch, nbr_seats=4, owner=owner, drivers=drivers, engine=engine
93+
)
94+
cars[car.name.value] = car # type: ignore[attr-defined]
3995

4096
return cars
4197

98+
async def load_data(self, nb_elements: int) -> None:
99+
"""
100+
Load cars in main branch, rebase diff branch on main branch, then load changes
101+
within diff branch according to a given ratio.
102+
Differences are:
103+
- Updates some cars attributes as well as 1:1, 1:N, N:N relationships.
104+
- Add new cars.
105+
Note that we do not delete cars within diff branch as it seems to take too long.
106+
"""
107+
108+
assert self.persons is not None, "'init' method should be called before 'load_data'"
109+
110+
if nb_elements == 0:
111+
return
112+
113+
# Load cars in main branch
114+
new_cars = await self.load_cars_with_multiple_rels(nb_cars=nb_elements, branch=self.main_branch)
115+
116+
# Integrate these new cars in diff branch
117+
await self.diff_branch.rebase(self.db)
118+
119+
# Retrieve car nodes from diff branch, including the ones not present in main branch
120+
# that were created by prior calls to `load_data`
121+
car_schema = registry.schema.get_node_schema(name="TestCar", branch=self.diff_branch)
122+
car_nodes = await NodeManager.query(db=self.db, schema=car_schema, branch=self.diff_branch)
123+
new_car_nodes = [car_node for car_node in car_nodes if car_node.name.value in new_cars]
124+
125+
nb_diff = max(int(nb_elements * self.diff_ratio), 1)
126+
127+
# Update cars in diff branch
128+
car_nodes_updatable = new_car_nodes
129+
car_nodes_to_update = random.choices(car_nodes_updatable, k=nb_diff)
130+
for i, car_node in enumerate(car_nodes_to_update):
131+
car_node.name.value = f"updated-car-{str(uuid.uuid4())[:8]}"
132+
133+
# Permute engines among car nodes to update, so it keeps one-to-one relationship between cars-engines
134+
new_engine = car_nodes_to_update[(i + 1) % len(car_nodes_to_update)].engine
135+
car_node.engine.update(db=self.db, data=new_engine)
136+
137+
# Update one-to-many relationship
138+
new_owner = random.choice([self.persons[person_name] for person_name in self.persons])
139+
car_node.owner.update(db=self.db, data=new_owner)
140+
141+
# Update many-to-many relationship
142+
new_drivers = random.choices([self.persons[person_name] for person_name in self.persons])
143+
car_node.drivers.update(db=self.db, data=new_drivers)
144+
145+
await car_node.save(db=self.db)
146+
147+
# Add a few cars in diff branch
148+
added_cars = await self.load_cars_with_multiple_rels(nb_cars=nb_diff, branch=self.diff_branch)
149+
150+
assert len(added_cars) == len(car_nodes_to_update) == nb_diff
151+
42152

43153
class PersonGenerator(DataGenerator):
44154
async def load_data(self, nb_elements: int) -> None:
@@ -77,42 +187,6 @@ async def load_persons(
77187
return persons_names_to_nodes
78188

79189

80-
class PersonFromExistingCarGenerator(PersonGenerator):
81-
cars: Optional[dict[str, Node]] # mapping of existing cars names -> node
82-
nb_cars: int
83-
84-
def __init__(self, db: InfrahubDatabaseProfiler, nb_cars: int) -> None:
85-
super().__init__(db)
86-
self.nb_cars = nb_cars
87-
self.cars = None
88-
89-
async def init(self) -> None:
90-
"""Load cars, that will be later connected to generated persons."""
91-
self.cars = await CarGenerator(self.db).load_cars(nb_cars=self.nb_cars)
92-
93-
async def load_data(self, nb_elements: int) -> None:
94-
assert self.cars is not None, "'init' method should be called before 'load_data'"
95-
await self.load_persons(nb_persons=nb_elements, cars=self.cars)
96-
97-
98-
class CarFromExistingPersonGenerator(CarGenerator):
99-
persons: Optional[dict[str, Node]] # mapping of existing cars names -> node
100-
nb_persons: int
101-
102-
def __init__(self, db: InfrahubDatabaseProfiler, nb_persons: int) -> None:
103-
super().__init__(db)
104-
self.nb_persons = nb_persons
105-
self.persons = None
106-
107-
async def init(self) -> None:
108-
"""Load persons, that will be later connected to generated cars."""
109-
self.persons = await PersonGenerator(self.db).load_persons(nb_persons=self.nb_persons)
110-
111-
async def load_data(self, nb_elements: int) -> None:
112-
assert self.persons is not None, "'init' method should be called before 'load_data'"
113-
await self.load_cars(nb_cars=nb_elements, persons=self.persons)
114-
115-
116190
class CarGeneratorWithOwnerHavingUniqueCar(CarGenerator):
117191
persons: list[Tuple[str, Node]] # mapping of existing cars names -> node
118192
nb_persons: int
@@ -154,33 +228,3 @@ async def load_data(self, nb_elements: int) -> None:
154228
await car_node.save(db=self.db)
155229

156230
self.nb_cars_loaded += nb_elements
157-
158-
159-
class CarAndPersonIsolatedGenerator(DataGenerator):
160-
def __init__(self, db: InfrahubDatabaseProfiler) -> None:
161-
super().__init__(db)
162-
self.car_generator: CarGenerator = CarGenerator(db)
163-
self.person_generator: PersonGenerator = PersonGenerator(db)
164-
165-
async def load_data(self, nb_elements: int) -> None:
166-
"""
167-
Load not connected cars and persons. Note that 'nb_elements' cars plus 'nb_elements' persons are loaded.
168-
"""
169-
170-
await self.car_generator.load_cars(nb_cars=nb_elements)
171-
await self.person_generator.load_persons(nb_persons=nb_elements)
172-
173-
174-
class CarAndPersonConnectedGenerator(DataGenerator):
175-
def __init__(self, db: InfrahubDatabaseProfiler) -> None:
176-
super().__init__(db)
177-
self.car_generator: CarGenerator = CarGenerator(db)
178-
self.person_generator: PersonGenerator = PersonGenerator(db)
179-
180-
async def load_data(self, nb_elements: int) -> None:
181-
"""
182-
Load connected cars and persons. Note that 'nb_elements' cars plus 'nb_elements' persons are loaded.
183-
"""
184-
185-
persons = await self.person_generator.load_persons(nb_persons=nb_elements)
186-
await self.car_generator.load_cars(nb_cars=nb_elements, persons=persons)

backend/tests/helpers/query_benchmark/data_generator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ async def load_data_and_profile(
6868
)
6969

7070
for i, nb_elem_to_load in enumerate(nb_elem_per_batch):
71+
print(f"Before loading batch {i=}. Current elements: {i * nb_elem_to_load=}")
7172
await data_generator.load_data(nb_elements=nb_elem_to_load)
7273
db_profiling_queries.increase_nb_elements_loaded(nb_elem_to_load)
7374
profile_memory = i % memory_profiling_rate == 0 if memory_profiling_rate is not None else False

backend/tests/query_benchmark/conftest.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pytest
55

6+
from infrahub import config
67
from infrahub.core.constants import BranchSupportType
78
from infrahub.core.schema import SchemaRoot
89
from tests.helpers.query_benchmark.db_query_profiler import GraphProfileGenerator
@@ -40,6 +41,14 @@ async def car_person_schema_root() -> SchemaRoot:
4041
"peer": "TestPerson",
4142
"cardinality": "one",
4243
},
44+
{
45+
"name": "drivers",
46+
"label": "Who already drove the car",
47+
"peer": "TestPerson",
48+
"identifier": "testcar__drivers",
49+
"cardinality": "many",
50+
},
51+
{"name": "engine", "label": "engine of the car", "peer": "TestEngine", "cardinality": "one"},
4352
],
4453
},
4554
{
@@ -54,7 +63,32 @@ async def car_person_schema_root() -> SchemaRoot:
5463
{"name": "height", "kind": "Number", "optional": True},
5564
],
5665
"relationships": [
57-
{"name": "cars", "peer": "TestCar", "cardinality": "many"},
66+
{
67+
"name": "cars",
68+
"peer": "TestCar",
69+
"cardinality": "many",
70+
},
71+
{
72+
"name": "driven_cars",
73+
"label": "Already driven by the Person",
74+
"peer": "TestCar",
75+
"identifier": "testcar__drivers",
76+
"cardinality": "many",
77+
},
78+
],
79+
},
80+
{
81+
"name": "Engine",
82+
"namespace": "Test",
83+
"default_filter": "name__value",
84+
"display_labels": ["name__value"],
85+
"branch": BranchSupportType.AWARE.value,
86+
"uniqueness_constraints": [["name__value"]],
87+
"attributes": [
88+
{"name": "name", "kind": "Text", "unique": True},
89+
],
90+
"relationships": [
91+
{"name": "car", "peer": "TestCar", "cardinality": "one"},
5892
],
5993
},
6094
],
@@ -71,3 +105,11 @@ async def graph_generator() -> GraphProfileGenerator:
71105
"""
72106

73107
return GraphProfileGenerator()
108+
109+
110+
@pytest.fixture(scope="function")
111+
async def increase_query_size_limit() -> None:
112+
original_query_size_limit = config.SETTINGS.database.query_size_limit
113+
config.SETTINGS.database.query_size_limit = 1_000_000
114+
yield
115+
config.SETTINGS.database.query_size_limit = original_query_size_limit

0 commit comments

Comments
 (0)