Skip to content

Commit 7f8cf60

Browse files
authored
[optimizer] Reduce MRE::typ() calls in SemijoinIdempotence (#30944)
Ensures we only call `MRE::typ()` twice, rather than six times, in `attempt_join_simplification`. Changes the interface of `distinct_on_keys_of` to just take a `RelationType`, which should offer some additional savings. ### Motivation * This PR refactors existing code.
1 parent be1f626 commit 7f8cf60

File tree

1 file changed

+25
-18
lines changed

1 file changed

+25
-18
lines changed

src/transform/src/semijoin_idempotence.rs

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
//! which we will transfer to the columns of `D` thereby forming `C`.
2727
2828
use itertools::Itertools;
29+
use mz_repr::RelationType;
2930
use std::collections::BTreeMap;
3031

3132
use mz_expr::{Id, JoinInputMapper, LocalId, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT};
@@ -198,6 +199,9 @@ fn attempt_join_simplification(
198199
let input_mapper = JoinInputMapper::new(inputs);
199200

200201
if let Some((ltr, rtl)) = semijoin_bijection(inputs, equivalences) {
202+
// If semijoin_bijection returns `Some(...)`, then `inputs.len() == 2`.
203+
assert_eq!(inputs.len(), 2);
204+
201205
// Collect the `Get` identifiers each input might present as.
202206
let ids0 = as_filtered_get(&inputs[0], gets_behind_gets)
203207
.iter()
@@ -208,10 +212,12 @@ fn attempt_join_simplification(
208212
.map(|(id, _)| *id)
209213
.collect::<Vec<_>>();
210214

215+
// Record the types of the inputs, for use in both loops below.
216+
let typ0 = inputs[0].typ();
217+
let typ1 = inputs[1].typ();
218+
211219
// Consider replacing the second input for the benefit of the first.
212-
if distinct_on_keys_of(&inputs[1], &rtl)
213-
&& input_mapper.input_arity(1) == equivalences.len()
214-
{
220+
if distinct_on_keys_of(&typ1, &rtl) && input_mapper.input_arity(1) == equivalences.len() {
215221
for mut candidate in list_replacements(&inputs[1], let_replacements, gets_behind_gets) {
216222
if ids0.contains(&candidate.id) {
217223
if let Some(permutation) = validate_replacement(&ltr, &mut candidate) {
@@ -222,11 +228,15 @@ fn attempt_join_simplification(
222228
// The pushdown is for the benefit of CSE on the `A` expressions,
223229
// in the not uncommon case of nullable foreign keys in outer joins.
224230
// TODO: Discover the transform that would not require this code.
225-
let typ0 = inputs[0].typ().column_types;
226-
let typ1 = inputs[1].typ().column_types;
227231
let mut is_not_nulls = Vec::new();
228232
for (col0, col1) in ltr.iter() {
229-
if !typ1[*col1].nullable && typ0[*col0].nullable {
233+
// We are using the pre-computed types; recomputing the types here
234+
// might alter nullability. As of 2025-01-09, Gábor has not found that
235+
// happening. But for the future, notice that this could be a source of
236+
// inaccurate or inconsistent nullability information.
237+
if !typ1.column_types[*col1].nullable
238+
&& typ0.column_types[*col0].nullable
239+
{
230240
is_not_nulls.push(MirScalarExpr::Column(*col0).call_is_null().not())
231241
}
232242
}
@@ -243,9 +253,7 @@ fn attempt_join_simplification(
243253
}
244254
}
245255
// Consider replacing the first input for the benefit of the second.
246-
if distinct_on_keys_of(&inputs[0], &ltr)
247-
&& input_mapper.input_arity(0) == equivalences.len()
248-
{
256+
if distinct_on_keys_of(&typ0, &ltr) && input_mapper.input_arity(0) == equivalences.len() {
249257
for mut candidate in list_replacements(&inputs[0], let_replacements, gets_behind_gets) {
250258
if ids1.contains(&candidate.id) {
251259
if let Some(permutation) = validate_replacement(&rtl, &mut candidate) {
@@ -256,11 +264,11 @@ fn attempt_join_simplification(
256264
// The pushdown is for the benefit of CSE on the `A` expressions,
257265
// in the not uncommon case of nullable foreign keys in outer joins.
258266
// TODO: Discover the transform that would not require this code.
259-
let typ0 = inputs[0].typ().column_types;
260-
let typ1 = inputs[1].typ().column_types;
261267
let mut is_not_nulls = Vec::new();
262268
for (col1, col0) in rtl.iter() {
263-
if !typ0[*col0].nullable && typ1[*col1].nullable {
269+
if !typ0.column_types[*col0].nullable
270+
&& typ1.column_types[*col1].nullable
271+
{
264272
is_not_nulls.push(MirScalarExpr::Column(*col1).call_is_null().not())
265273
}
266274
}
@@ -422,7 +430,7 @@ fn list_replacements_join(
422430
// Each unique key could be a semijoin candidate.
423431
// We want to check that the join equivalences exactly match the key,
424432
// and then transcribe the corresponding columns in the other input.
425-
if distinct_on_keys_of(&inputs[1], &rtl) {
433+
if distinct_on_keys_of(&inputs[1].typ(), &rtl) {
426434
let columns = ltr
427435
.iter()
428436
.map(|(k0, k1)| (*k0, *k0, *k1))
@@ -452,7 +460,7 @@ fn list_replacements_join(
452460
// Each unique key could be a semijoin candidate.
453461
// We want to check that the join equivalences exactly match the key,
454462
// and then transcribe the corresponding columns in the other input.
455-
if distinct_on_keys_of(&inputs[0], &ltr) {
463+
if distinct_on_keys_of(&inputs[0].typ(), &ltr) {
456464
let columns = ltr
457465
.iter()
458466
.map(|(k0, k1)| (*k1, *k0, *k0))
@@ -484,10 +492,9 @@ fn list_replacements_join(
484492
results
485493
}
486494

487-
/// True iff some unique key of `input` is contained in the keys of `map`.
488-
fn distinct_on_keys_of(expr: &MirRelationExpr, map: &BTreeMap<usize, usize>) -> bool {
489-
expr.typ()
490-
.keys
495+
/// True iff some unique key of `typ` is contained in the keys of `map`.
496+
fn distinct_on_keys_of(typ: &RelationType, map: &BTreeMap<usize, usize>) -> bool {
497+
typ.keys
491498
.iter()
492499
.any(|key| key.iter().all(|k| map.contains_key(k)))
493500
}

0 commit comments

Comments
 (0)