From 278701fcf20800e24c363e87bb768189272e07d2 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 3 Dec 2025 15:35:39 -0500 Subject: [PATCH] new transform (with feature flag `enable_cast_elimination`) that eliminates varchar_to_text --- misc/python/materialize/mzcompose/__init__.py | 1 + src/expr/src/scalar.rs | 12 +++ src/repr/src/optimize.rs | 4 + src/sql/src/plan/statement/ddl.rs | 1 + src/sql/src/plan/statement/dml.rs | 1 + src/sql/src/session/vars/definitions.rs | 9 +- src/transform/src/eliminate_noop_casts.rs | 100 ++++++++++++++++++ src/transform/src/lib.rs | 9 +- src/transform/tests/test_runner.rs | 7 +- 9 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 src/transform/src/eliminate_noop_casts.rs diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py index 926c9f72d0122..a911be9cd955e 100644 --- a/misc/python/materialize/mzcompose/__init__.py +++ b/misc/python/materialize/mzcompose/__init__.py @@ -83,6 +83,7 @@ def get_minimal_system_parameters( "true" if version >= MzVersion.parse_mz("v0.132.0-dev") else "false" ), "enable_alter_swap": "true", + "enable_cast_elimination": "true", "enable_columnar_lgalloc": "false", "enable_columnation_lgalloc": "false", "enable_compute_correction_v2": "true", diff --git a/src/expr/src/scalar.rs b/src/expr/src/scalar.rs index 974e2cd1c7eeb..2d96470c929f2 100644 --- a/src/expr/src/scalar.rs +++ b/src/expr/src/scalar.rs @@ -1568,6 +1568,18 @@ impl MirScalarExpr { /* #endregion */ } + pub fn elimimate_noop_casts(&mut self) -> Result<(), RecursionLimitError> { + self.visit_mut_post(&mut |e| match e { + MirScalarExpr::CallUnary { + func: UnaryFunc::CastVarCharToString(_), + expr, + } => { + *e = expr.take(); + } + _ => {} + }) + } + /// Decompose an IsNull expression into a disjunction of /// simpler expressions. /// diff --git a/src/repr/src/optimize.rs b/src/repr/src/optimize.rs index 3ad66e4482fb4..6471767cdf698 100644 --- a/src/repr/src/optimize.rs +++ b/src/repr/src/optimize.rs @@ -130,8 +130,12 @@ optimizer_feature_flags!({ enable_less_reduce_in_eqprop: bool, // See the feature flag of the same name. enable_dequadratic_eqprop_map: bool, + // See the feature flag of the same name. enable_fast_path_plan_insights: bool, + // See the feature flag of the same name. enable_repr_typecheck: bool, + // See the feature flag of the same name. + enable_cast_elimination: bool, }); /// A trait used to implement layered config construction. diff --git a/src/sql/src/plan/statement/ddl.rs b/src/sql/src/plan/statement/ddl.rs index e0c6309bc3cbf..b58f3b981d2ae 100644 --- a/src/sql/src/plan/statement/ddl.rs +++ b/src/sql/src/plan/statement/ddl.rs @@ -4914,6 +4914,7 @@ pub fn unplan_create_cluster( enable_eq_classes_withholding_errors: _, enable_fast_path_plan_insights: _, enable_repr_typecheck: _, + enable_cast_elimination: _, } = optimizer_feature_overrides; // The ones from above that don't occur below are not wired up to cluster features. let features_extracted = ClusterFeatureExtracted { diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index 88c72b07848d0..9fe474bd7f501 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -639,6 +639,7 @@ impl TryFrom for ExplainConfig { enable_eq_classes_withholding_errors: Default::default(), enable_fast_path_plan_insights: Default::default(), enable_repr_typecheck: Default::default(), + enable_cast_elimination: Default::default(), }, }) } diff --git a/src/sql/src/session/vars/definitions.rs b/src/sql/src/session/vars/definitions.rs index 84c83283e9a4c..db397b5133726 100644 --- a/src/sql/src/session/vars/definitions.rs +++ b/src/sql/src/session/vars/definitions.rs @@ -2220,7 +2220,7 @@ feature_flags!( }, { name: enable_repr_typecheck, - desc: "Enable typechecking using representation types", + desc: "Enable typechecking using representation types.", default: false, enable_for_item_parsing: false, }, @@ -2230,6 +2230,12 @@ feature_flags!( default: false, enable_for_item_parsing: false, }, + { + name: enable_cast_elimination, + desc: "Allow the optimizer to eliminate noop casts between values of equivalent representation types.", + default: false, + enable_for_item_parsing: false, + }, ); impl From<&super::SystemVars> for OptimizerFeatures { @@ -2254,6 +2260,7 @@ impl From<&super::SystemVars> for OptimizerFeatures { enable_eq_classes_withholding_errors: vars.enable_eq_classes_withholding_errors(), enable_fast_path_plan_insights: vars.enable_fast_path_plan_insights(), enable_repr_typecheck: vars.enable_repr_typecheck(), + enable_cast_elimination: vars.enable_cast_elimination(), } } } diff --git a/src/transform/src/eliminate_noop_casts.rs b/src/transform/src/eliminate_noop_casts.rs new file mode 100644 index 0000000000000..602b18bd7f088 --- /dev/null +++ b/src/transform/src/eliminate_noop_casts.rs @@ -0,0 +1,100 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! SQL types distinguish between `varchar` and `varchar(n)` and `text` as +//! types, but they are all represented using `Datum::String` at runtime. +//! This transform eliminates noop casts between values of equivalent +//! representation types. s + +use mz_expr::MirRelationExpr; +use mz_ore::soft_assert_or_log; + +use crate::{TransformCtx, TransformError}; + +/// A transform that eliminates noop casts between values of equivalent representation types. +#[derive(Debug)] +pub struct EliminateNoopCasts; + +impl crate::Transform for EliminateNoopCasts { + fn name(&self) -> &'static str { + "EliminateNoopCasts" + } + + fn actually_perform_transform( + &self, + relation: &mut MirRelationExpr, + ctx: &mut TransformCtx, + ) -> Result<(), TransformError> { + soft_assert_or_log!( + ctx.features.enable_cast_elimination, + "cast elimination is not enabled but the pass ran anyway" + ); + + // Descend the AST, reducing scalar expressions. + let mut todo = vec![&mut *relation]; + while let Some(expr) = todo.pop() { + match expr { + MirRelationExpr::Constant { .. } + | MirRelationExpr::Get { .. } + | MirRelationExpr::Let { .. } + | MirRelationExpr::LetRec { .. } + | MirRelationExpr::Project { .. } + | MirRelationExpr::Union { .. } + | MirRelationExpr::Threshold { .. } + | MirRelationExpr::Negate { .. } => { + // No expressions to reduce + } + MirRelationExpr::Map { scalars: exprs, .. } + | MirRelationExpr::FlatMap { exprs, .. } + | MirRelationExpr::Filter { + predicates: exprs, .. + } => { + for e in exprs.iter_mut() { + e.elimimate_noop_casts().map_err(TransformError::from)?; + } + } + MirRelationExpr::Join { + equivalences: vecexprs, + .. + } + | MirRelationExpr::ArrangeBy { keys: vecexprs, .. } => { + for exprs in vecexprs.iter_mut() { + for e in exprs.iter_mut() { + e.elimimate_noop_casts().map_err(TransformError::from)?; + } + } + } + MirRelationExpr::Reduce { + group_key: exprs, + aggregates, + .. + } => { + for e in exprs.iter_mut() { + e.elimimate_noop_casts().map_err(TransformError::from)?; + } + + for agg in aggregates.iter_mut() { + agg.expr + .elimimate_noop_casts() + .map_err(TransformError::from)?; + } + } + MirRelationExpr::TopK { limit, .. } => { + if let Some(limit) = limit { + limit.elimimate_noop_casts().map_err(TransformError::from)?; + } + } + } + + todo.extend(expr.children_mut()) + } + + Ok(()) + } +} diff --git a/src/transform/src/lib.rs b/src/transform/src/lib.rs index e3a040e136465..346c9a51abdc0 100644 --- a/src/transform/src/lib.rs +++ b/src/transform/src/lib.rs @@ -40,6 +40,7 @@ use crate::canonicalize_mfp::CanonicalizeMfp; use crate::column_knowledge::ColumnKnowledge; use crate::dataflow::DataflowMetainfo; use crate::demand::Demand; +use crate::eliminate_noop_casts::EliminateNoopCasts; use crate::equivalence_propagation::EquivalencePropagation; use crate::fold_constants::FoldConstants; use crate::join_implementation::JoinImplementation; @@ -71,6 +72,7 @@ pub mod compound; pub mod cse; pub mod dataflow; pub mod demand; +pub mod eliminate_noop_casts; pub mod equivalence_propagation; pub mod fold_constants; pub mod fusion; @@ -747,6 +749,8 @@ impl Optimizer { let transforms: Vec> = transforms![ Box::new(Typecheck::new(ctx.typecheck()).strict_join_equivalences()), Box::new(ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences()); if ctx.features.enable_repr_typecheck, + Box::new(EliminateNoopCasts); if ctx.features.enable_cast_elimination, + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences()); if ctx.features.enable_repr_typecheck && ctx.features.enable_cast_elimination, // 1. Structure-agnostic cleanup Box::new(normalize()), Box::new(NonNullRequirements::default()), @@ -956,8 +960,9 @@ impl Optimizer { } /// Builds a tiny optimizer, which is only suitable for optimizing fast-path queries. - pub fn fast_path_optimizer(_ctx: &mut TransformCtx) -> Self { - let transforms: Vec> = vec![ + pub fn fast_path_optimizer(ctx: &mut TransformCtx) -> Self { + let transforms: Vec> = transforms![ + Box::new(EliminateNoopCasts); if ctx.features.enable_cast_elimination, Box::new(canonicalization::ReduceScalars), Box::new(LiteralConstraints), Box::new(CanonicalizeMfp), diff --git a/src/transform/tests/test_runner.rs b/src/transform/tests/test_runner.rs index 2cf048eaeeacf..968ab5b1b5467 100644 --- a/src/transform/tests/test_runner.rs +++ b/src/transform/tests/test_runner.rs @@ -34,6 +34,7 @@ mod tests { use mz_transform::dataflow::{ DataflowMetainfo, optimize_dataflow_demand_inner, optimize_dataflow_filters_inner, }; + use mz_transform::eliminate_noop_casts::EliminateNoopCasts; use mz_transform::{Optimizer, Transform, TransformCtx, reprtypecheck, typecheck}; use proc_macro2::TokenTree; @@ -427,12 +428,16 @@ mod tests { Some(TEST_GLOBAL_ID), ); + let cast_elim = EliminateNoopCasts; let log_optimizer = Optimizer::logical_cleanup_pass(&mut transform_ctx, true); let phys_optimizer = Optimizer::physical_optimizer(&mut transform_ctx); dataflow = dataflow .into_iter() - .map(|(id, rel)| { + .map(|(id, mut rel)| { transform_ctx.set_global_id(id); + if transform_ctx.features.enable_cast_elimination { + cast_elim.transform(&mut rel, &mut transform_ctx).unwrap(); + } let local_mir_plan = log_optimizer .optimize(rel, &mut transform_ctx) .unwrap()