|
10 | 10 | //! Transformation based on pushing demand information about columns toward sources. |
11 | 11 |
|
12 | 12 | use itertools::Itertools; |
13 | | -use mz_ore::assert_none; |
14 | 13 | use std::collections::{BTreeMap, BTreeSet}; |
15 | 14 |
|
| 15 | +use mz_expr::visit::Visit; |
16 | 16 | use mz_expr::{ |
17 | | - AggregateExpr, AggregateFunc, Id, JoinInputMapper, MirRelationExpr, MirScalarExpr, |
18 | | - RECURSION_LIMIT, |
| 17 | + AggregateExpr, AggregateFunc, Id, JoinInputMapper, MapFilterProject, MirRelationExpr, |
| 18 | + MirScalarExpr, RECURSION_LIMIT, |
19 | 19 | }; |
20 | 20 | use mz_ore::stack::{CheckedRecursion, RecursionGuard}; |
| 21 | +use mz_ore::{assert_none, soft_assert_or_log}; |
21 | 22 | use mz_repr::{Datum, Row}; |
22 | 23 |
|
23 | 24 | use crate::TransformCtx; |
@@ -365,4 +366,65 @@ impl Demand { |
365 | 366 | } |
366 | 367 | }) |
367 | 368 | } |
| 369 | + |
| 370 | + /// Checks the optimizer invariant that there are no more opportunities for projection pushdown, |
| 371 | + /// comparing against Demand. |
| 372 | + /// |
| 373 | + /// More specifically, we check that the MFPs directly on top of global Gets project at most |
| 374 | + /// those columns that `Demand` determines are demanded from that global Get. An exception is |
| 375 | + /// when there is an `ArrangeBy` on top of a global `Get`, in which case the relevant MFP might |
| 376 | + /// have been lifted away by `JoinImplementation` to enable index reuse. We skip checking the |
| 377 | + /// MFP in this case. |
| 378 | + /// |
| 379 | + /// This is meant to be called at the end of the MIR pipeline, where it can catch 3 types of |
| 380 | + /// issues: |
| 381 | + /// 1. If `ProjectionPushdown` is not as smart as `Demand`. |
| 382 | + /// 2. If some transform after the last run of `ProjectionPushdown` undoes something that |
| 383 | + /// `ProjectionPushdown` did. |
| 384 | + /// 3. If some transform after the last run of `ProjectionPushdown` opens up new opportunities |
| 385 | + /// for projection pushdown. |
| 386 | + pub fn soft_assert_no_more_projection_pushdown( |
| 387 | + relation: &MirRelationExpr, |
| 388 | + ) -> Result<(), crate::TransformError> { |
| 389 | + let mut relation = relation.clone(); |
| 390 | + let arity = relation.arity(); |
| 391 | + // Gather demanded columns of each of the Ids. |
| 392 | + let mut demand_on_ids = BTreeMap::new(); |
| 393 | + let demand = Demand::default(); |
| 394 | + demand.action(&mut relation, (0..arity).collect(), &mut demand_on_ids)?; |
| 395 | + // Check that MFPs on top of global Gets project at most those columns that Demand thinks |
| 396 | + // are demanded. |
| 397 | + // |
| 398 | + // We use `visit_pre_post`, because we'd like to control how we descend to children: we want |
| 399 | + // to jump over an MFP that we find at the current node, in order to avoid observing partial |
| 400 | + // MFPs, that are without their Projects. |
| 401 | + relation.visit_pre_post(&mut |expr| { |
| 402 | + let (mfp, expr) = MapFilterProject::extract_from_expression(expr); |
| 403 | + match expr { |
| 404 | + MirRelationExpr::Get { id: Id::Global(id), .. } => { |
| 405 | + let demand = demand_on_ids.get(&Id::Global(*id)).expect("`Demand` should have an opinion on all ids"); |
| 406 | + let actual_proj_above_get = mfp.projection.iter().filter(|c| **c < mfp.input_arity).collect_vec(); |
| 407 | + soft_assert_or_log!( |
| 408 | + actual_proj_above_get.iter().all(|c| demand.contains(c)), |
| 409 | + "Missed ProjectionPushdown opportunity: demand on {}: {:?}. actual_proj_above_get: {:?}. The whole plan:\n{}\n\nmfp:{},\nexpr:{}", |
| 410 | + id, demand, actual_proj_above_get, relation.pretty(), mfp, expr.pretty() |
| 411 | + ); |
| 412 | + // Don't descend to children. |
| 413 | + Some(vec![]) |
| 414 | + }, |
| 415 | + // If there is an ArrangeBy on top of a global Get, then a projection might have |
| 416 | + // been lifted away by JoinImplementation to re-use an index, so skip checking the |
| 417 | + // invariant in this case. |
| 418 | + MirRelationExpr::ArrangeBy { input, .. } if matches!(**input, MirRelationExpr::Get { id: Id::Global(_), .. }) => { |
| 419 | + Some(vec![]) |
| 420 | + }, |
| 421 | + // Just continue with the children of the operator that we found below the MFP. |
| 422 | + _ => { |
| 423 | + Some(expr.children().collect()) |
| 424 | + }, |
| 425 | + } |
| 426 | + |
| 427 | + }, &mut |_| {})?; |
| 428 | + Ok(()) |
| 429 | + } |
368 | 430 | } |
0 commit comments