Skip to content

Commit 5d4d794

Browse files
authored
Implement leaf node merging (#269)
Introduces a new optimization step that merges leaf nodes in the fetch graph to reduce the number of fetch steps, without degrading the query performance. This is achieved by merging leaf nodes with other nodes if they share the same service, response path, input type name, condition, and no argument conflicts exist. The snapshot differences are either fields in different order or steps being merged (always less than in the previous snapshot). Closes #268
1 parent c073d09 commit 5d4d794

File tree

9 files changed

+523
-380
lines changed

9 files changed

+523
-380
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/query-planner/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ graphql-tools = "0.4.0"
1111
graphql-parser = "0.4.1"
1212
thiserror = "2.0.12"
1313
petgraph = "0.8.0"
14+
rustc-hash = "2.1.1"
1415
tracing = { version = "0.1.41" }
1516
serde = "1.0.219"
1617
serde_json = { version = "1.0.140", features = ["preserve_order"] }

lib/query-planner/src/ast/hash.rs

Lines changed: 189 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use std::collections::hash_map::DefaultHasher;
2-
use std::hash::{Hash, Hasher};
1+
use rustc_hash::{FxBuildHasher, FxHasher};
2+
use std::hash::{BuildHasher, Hash, Hasher};
33

44
use crate::ast::arguments::ArgumentsMap;
55
use crate::ast::operation::{OperationDefinition, VariableDefinition};
@@ -14,7 +14,7 @@ pub trait ASTHash {
1414
}
1515

1616
pub fn ast_hash(query: &OperationDefinition) -> u64 {
17-
let mut hasher = DefaultHasher::new();
17+
let mut hasher = FxHasher::default();
1818
query.ast_hash(&mut hasher);
1919
hasher.finish()
2020
}
@@ -111,32 +111,38 @@ impl ASTHash for &InlineFragmentSelection {
111111
}
112112

113113
impl ASTHash for ArgumentsMap {
114-
fn ast_hash<H: Hasher>(&self, hasher: &mut H) {
115-
// Order does not matter for hashing
116-
// The order of arguments does not matter.
117-
// To achieve order-insensitivity, we get all keys, sort them, and then
118-
// hash them with their values in that order.
119-
let mut keys: Vec<_> = self.keys().collect();
120-
keys.sort_unstable();
121-
for key in keys {
122-
key.hash(hasher);
123-
// We can unwrap here because we are iterating over existing keys
124-
self.get_argument(key).unwrap().ast_hash(hasher);
114+
fn ast_hash<H: Hasher>(&self, state: &mut H) {
115+
let mut combined_hash: u64 = 0;
116+
let build_hasher = FxBuildHasher;
117+
118+
// To achieve an order-independent hash, we hash each key-value pair
119+
// individually and then combine their hashes using XOR (^).
120+
// Since XOR is commutative, the final hash is not affected by the iteration order.
121+
for (key, value) in self.into_iter() {
122+
let mut key_val_hasher = build_hasher.build_hasher();
123+
key.hash(&mut key_val_hasher);
124+
value.ast_hash(&mut key_val_hasher);
125+
combined_hash ^= key_val_hasher.finish();
125126
}
127+
128+
state.write_u64(combined_hash);
126129
}
127130
}
128131

129132
impl ASTHash for Vec<VariableDefinition> {
130133
fn ast_hash<H: Hasher>(&self, hasher: &mut H) {
131-
// Order does not matter for hashing
132-
// The order of variables does not matter.
133-
// We do a similar thing as for ArgumentsMap,
134-
// we sort the variables by their names.
135-
let mut variables: Vec<_> = self.iter().collect();
136-
variables.sort_by_key(|f| &f.name);
137-
for var in variables {
138-
var.ast_hash(hasher);
134+
let mut combined_hash: u64 = 0;
135+
let build_hasher = FxBuildHasher;
136+
// To achieve an order-independent hash, we hash each key-value pair
137+
// individually and then combine their hashes using XOR (^).
138+
// Since XOR is commutative, the final hash is not affected by the iteration order.
139+
for variable in self.iter() {
140+
let mut local_hasher = build_hasher.build_hasher();
141+
variable.ast_hash(&mut local_hasher);
142+
combined_hash ^= local_hasher.finish();
139143
}
144+
145+
hasher.write_u64(combined_hash);
140146
}
141147
}
142148

@@ -196,3 +202,164 @@ impl ASTHash for Value {
196202
}
197203
}
198204
}
205+
206+
#[cfg(test)]
207+
mod tests {
208+
use super::*;
209+
use crate::ast::arguments::ArgumentsMap;
210+
use crate::ast::operation::{OperationDefinition, VariableDefinition};
211+
use crate::ast::selection_item::SelectionItem;
212+
use crate::ast::selection_set::{FieldSelection, SelectionSet};
213+
use crate::ast::value::Value;
214+
use crate::state::supergraph_state::{OperationKind, TypeNode};
215+
use std::collections::BTreeMap;
216+
217+
fn create_test_operation() -> OperationDefinition {
218+
let mut arguments = ArgumentsMap::new();
219+
arguments.add_argument("limit".to_string(), Value::Int(10));
220+
arguments.add_argument("sort".to_string(), Value::Enum("ASC".to_string()));
221+
222+
let mut nested_object = BTreeMap::new();
223+
nested_object.insert(
224+
"nestedKey".to_string(),
225+
Value::String("nestedValue".to_string()),
226+
);
227+
228+
arguments.add_argument("obj".to_string(), Value::Object(nested_object));
229+
230+
let field_selection = FieldSelection {
231+
name: "users".to_string(),
232+
alias: Some("all_users".to_string()),
233+
selections: SelectionSet {
234+
items: vec![
235+
SelectionItem::Field(FieldSelection {
236+
name: "id".to_string(),
237+
alias: None,
238+
selections: SelectionSet { items: vec![] },
239+
arguments: None,
240+
include_if: None,
241+
skip_if: None,
242+
}),
243+
SelectionItem::Field(FieldSelection {
244+
name: "name".to_string(),
245+
alias: None,
246+
selections: SelectionSet { items: vec![] },
247+
arguments: None,
248+
include_if: Some("includeName".to_string()),
249+
skip_if: None,
250+
}),
251+
],
252+
},
253+
arguments: Some(arguments),
254+
include_if: None,
255+
skip_if: Some("skipUsers".to_string()),
256+
};
257+
258+
let selection_set = SelectionSet {
259+
items: vec![SelectionItem::Field(field_selection)],
260+
};
261+
262+
let variable_definitions = vec![
263+
VariableDefinition {
264+
name: "skipUsers".to_string(),
265+
variable_type: TypeNode::NonNull(Box::new(TypeNode::Named("Boolean".to_string()))),
266+
default_value: Some(Value::Boolean(false)),
267+
},
268+
VariableDefinition {
269+
name: "includeName".to_string(),
270+
variable_type: TypeNode::Named("Boolean".to_string()),
271+
default_value: None,
272+
},
273+
];
274+
275+
OperationDefinition {
276+
operation_kind: Some(OperationKind::Query),
277+
selection_set,
278+
variable_definitions: Some(variable_definitions),
279+
name: Some("TestQuery".to_string()),
280+
}
281+
}
282+
283+
#[test]
284+
fn test_ast_hash_is_deterministic() {
285+
let operation = create_test_operation();
286+
287+
let hash1 = ast_hash(&operation);
288+
let hash2 = ast_hash(&operation);
289+
290+
// Test that the hash is consistent within the same run
291+
assert_eq!(hash1, hash2, "AST hash should be consistent");
292+
293+
// Snapshot test: compare against a known, pre-calculated hash.
294+
// If the hashing logic changes, this value will need to be updated.
295+
let expected_hash = 8854078506550230644;
296+
assert_eq!(
297+
hash1, expected_hash,
298+
"AST hash does not match the snapshot value. If this change is intentional, update the snapshot."
299+
);
300+
}
301+
302+
#[test]
303+
fn test_order_independent_hashing_for_arguments() {
304+
let mut args1 = ArgumentsMap::new();
305+
args1.add_argument("a".to_string(), Value::Int(1));
306+
args1.add_argument("b".to_string(), Value::Int(2));
307+
308+
let mut args2 = ArgumentsMap::new();
309+
args2.add_argument("b".to_string(), Value::Int(2));
310+
args2.add_argument("a".to_string(), Value::Int(1));
311+
312+
let mut hasher1 = FxHasher::default();
313+
args1.ast_hash(&mut hasher1);
314+
315+
let mut hasher2 = FxHasher::default();
316+
args2.ast_hash(&mut hasher2);
317+
318+
assert_eq!(
319+
hasher1.finish(),
320+
hasher2.finish(),
321+
"ArgumentsMap hashing should be order-independent"
322+
);
323+
}
324+
325+
#[test]
326+
fn test_order_independent_hashing_for_variables() {
327+
let vars1 = vec![
328+
VariableDefinition {
329+
name: "varA".to_string(),
330+
variable_type: TypeNode::Named("String".to_string()),
331+
default_value: None,
332+
},
333+
VariableDefinition {
334+
name: "varB".to_string(),
335+
variable_type: TypeNode::Named("Int".to_string()),
336+
default_value: Some(Value::Int(0)),
337+
},
338+
];
339+
340+
let vars2 = vec![
341+
VariableDefinition {
342+
name: "varB".to_string(),
343+
variable_type: TypeNode::Named("Int".to_string()),
344+
default_value: Some(Value::Int(0)),
345+
},
346+
VariableDefinition {
347+
name: "varA".to_string(),
348+
variable_type: TypeNode::Named("String".to_string()),
349+
default_value: None,
350+
},
351+
];
352+
353+
let mut hasher1 = FxHasher::default();
354+
vars1.ast_hash(&mut hasher1);
355+
356+
let mut hasher2 = FxHasher::default();
357+
vars2.ast_hash(&mut hasher2);
358+
359+
assert_eq!(
360+
hasher1.finish(),
361+
hasher2.finish(),
362+
"VariableDefinition vector hashing should be order-independent"
363+
);
364+
}
365+
}

lib/query-planner/src/planner/fetch/fetch_graph.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::planner::tree::query_tree_node::{MutationFieldPosition, QueryTreeNode
1212
use crate::planner::walker::path::OperationPath;
1313
use crate::planner::walker::pathfinder::can_satisfy_edge;
1414
use crate::state::supergraph_state::{SubgraphName, SupergraphState};
15+
use petgraph::algo::has_path_connecting;
1516
use petgraph::graph::EdgeReference;
1617
use petgraph::stable_graph::{EdgeIndex, NodeIndex, NodeIndices, NodeReferences, StableDiGraph};
1718
use petgraph::visit::EdgeRef;
@@ -58,6 +59,10 @@ impl FetchGraph {
5859
self.graph.edges_directed(index, Direction::Outgoing)
5960
}
6061

62+
pub fn is_descendant_of(&self, descendant: NodeIndex, ancestor: NodeIndex) -> bool {
63+
has_path_connecting(&self.graph, ancestor, descendant, None)
64+
}
65+
6166
pub fn step_indices(&self) -> NodeIndices<FetchStepData> {
6267
self.graph.node_indices()
6368
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
use petgraph::graph::NodeIndex;
2+
use tracing::{instrument, trace};
3+
4+
use crate::{
5+
ast::type_aware_selection::find_arguments_conflicts,
6+
planner::fetch::{
7+
error::FetchGraphError, fetch_graph::FetchGraph, fetch_step_data::FetchStepData,
8+
optimize::utils::perform_fetch_step_merge,
9+
},
10+
};
11+
12+
impl FetchStepData {
13+
pub fn can_merge_leafs(
14+
&self,
15+
self_index: NodeIndex,
16+
other_index: NodeIndex,
17+
other: &Self,
18+
fetch_graph: &FetchGraph,
19+
) -> bool {
20+
if self_index == other_index {
21+
return false;
22+
}
23+
24+
if self.service_name != other.service_name {
25+
return false;
26+
}
27+
28+
if self.response_path != other.response_path {
29+
return false;
30+
}
31+
32+
if self.input.type_name != other.input.type_name {
33+
return false;
34+
}
35+
36+
if self.condition != other.condition {
37+
return false;
38+
}
39+
40+
// otherwise we break the order of mutations
41+
if self.mutation_field_position != other.mutation_field_position {
42+
return false;
43+
}
44+
45+
// `other` must be a leaf node (no children).
46+
if fetch_graph.children_of(other_index).count() != 0 {
47+
return false;
48+
}
49+
50+
// We can't merge if one is a descendant of the other,
51+
// because there's a dependency between them,
52+
// that could lead to incorrect results.
53+
// Either input of "other" depends on the output of "self",
54+
// or input of "other" depends on the output of one of the steps in between.
55+
if fetch_graph.is_descendant_of(other_index, self_index) {
56+
return false;
57+
}
58+
59+
let input_conflicts = find_arguments_conflicts(&self.input, &other.input);
60+
61+
if !input_conflicts.is_empty() {
62+
return false;
63+
}
64+
65+
true
66+
}
67+
}
68+
69+
impl FetchGraph {
70+
#[instrument(level = "trace", skip_all)]
71+
/// This optimization is about merging leaf nodes in the fetch nodes with other nodes.
72+
/// It reduces the number of fetch steps, without degrading the query performance.
73+
/// The query performance is not degraded, because the leaf node has no children,
74+
/// meaning the overall depth (amount of parallel layers) is not increased.
75+
pub(crate) fn merge_leafs(&mut self) -> Result<(), FetchGraphError> {
76+
while let Some((target_idx, leaf_idx)) = self.find_merge_candidate()? {
77+
perform_fetch_step_merge(target_idx, leaf_idx, self)?;
78+
}
79+
80+
Ok(())
81+
}
82+
83+
fn find_merge_candidate(&self) -> Result<Option<(NodeIndex, NodeIndex)>, FetchGraphError> {
84+
let all_nodes: Vec<NodeIndex> = self
85+
.graph
86+
.node_indices()
87+
.filter(|&idx| self.root_index != Some(idx))
88+
.collect();
89+
90+
for &target_idx in &all_nodes {
91+
for &leaf_idx in &all_nodes {
92+
let target_data = self.get_step_data(target_idx)?;
93+
let leaf_data = self.get_step_data(leaf_idx)?;
94+
95+
if target_data.can_merge_leafs(target_idx, leaf_idx, leaf_data, self) {
96+
trace!(
97+
"optimization found: merge leaf [{}] with [{}]",
98+
leaf_idx.index(),
99+
target_idx.index(),
100+
);
101+
return Ok(Some((target_idx, leaf_idx)));
102+
}
103+
}
104+
}
105+
106+
Ok(None)
107+
}
108+
}

0 commit comments

Comments
 (0)