diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py index 926c9f72d0122..a37d38c72a7f4 100644 --- a/misc/python/materialize/mzcompose/__init__.py +++ b/misc/python/materialize/mzcompose/__init__.py @@ -106,7 +106,6 @@ def get_minimal_system_parameters( "enable_rbac_checks": "true", "enable_reduce_mfp_fusion": "true", "enable_refresh_every_mvs": "true", - "enable_repr_typecheck": "true", "enable_cluster_schedule_refresh": "true", "enable_sql_server_source": "true", "enable_statement_lifecycle_logging": "true", diff --git a/src/adapter/src/optimize/copy_to.rs b/src/adapter/src/optimize/copy_to.rs index 4816739bbf768..f159f58f9269d 100644 --- a/src/adapter/src/optimize/copy_to.rs +++ b/src/adapter/src/optimize/copy_to.rs @@ -31,7 +31,6 @@ use mz_transform::normalize_lets::normalize_lets; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use mz_transform::{StatisticsOracle, TransformCtx}; use timely::progress::Antichain; use tracing::warn; @@ -49,8 +48,6 @@ use crate::optimize::{ }; pub struct Optimizer { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. @@ -79,7 +76,6 @@ impl Optimizer { metrics: OptimizerMetrics, ) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, @@ -175,7 +171,6 @@ impl Optimize for Optimizer { let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), @@ -353,7 +348,6 @@ impl<'s> Optimize>> for Optimizer { &df_builder, &*stats, &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), diff --git a/src/adapter/src/optimize/index.rs b/src/adapter/src/optimize/index.rs index 61acfd2222af5..57b6e7d128312 100644 --- a/src/adapter/src/optimize/index.rs +++ b/src/adapter/src/optimize/index.rs @@ -41,7 +41,6 @@ use mz_transform::notice::{IndexAlreadyExists, IndexKeyEmpty}; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use crate::optimize::dataflows::{ ComputeInstanceSnapshot, DataflowBuilder, ExprPrepStyle, prep_relation_expr, prep_scalar_expr, @@ -52,8 +51,6 @@ use crate::optimize::{ }; pub struct Optimizer { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. @@ -79,7 +76,6 @@ impl Optimizer { metrics: OptimizerMetrics, ) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, @@ -182,7 +178,6 @@ impl Optimize for Optimizer { &df_builder, &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), diff --git a/src/adapter/src/optimize/materialized_view.rs b/src/adapter/src/optimize/materialized_view.rs index 321c4e974423b..2ce20a3fdd369 100644 --- a/src/adapter/src/optimize/materialized_view.rs +++ b/src/adapter/src/optimize/materialized_view.rs @@ -45,7 +45,6 @@ use mz_transform::normalize_lets::normalize_lets; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use timely::progress::Antichain; use crate::optimize::dataflows::{ @@ -57,8 +56,6 @@ use crate::optimize::{ }; pub struct Optimizer { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. @@ -119,7 +116,6 @@ impl Optimizer { force_source_non_monotonic: BTreeSet, ) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, @@ -202,7 +198,6 @@ impl Optimize for Optimizer { let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), @@ -292,7 +287,6 @@ impl Optimize for Optimizer { &df_builder, &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), diff --git a/src/adapter/src/optimize/peek.rs b/src/adapter/src/optimize/peek.rs index 7c9a93b7da58d..4b22d90b7b824 100644 --- a/src/adapter/src/optimize/peek.rs +++ b/src/adapter/src/optimize/peek.rs @@ -28,7 +28,6 @@ use mz_transform::normalize_lets::normalize_lets; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use mz_transform::{StatisticsOracle, TransformCtx}; use timely::progress::Antichain; use tracing::debug_span; @@ -46,8 +45,6 @@ use crate::optimize::{ }; pub struct Optimizer { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. @@ -79,7 +76,6 @@ impl Optimizer { metrics: OptimizerMetrics, ) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, @@ -186,7 +182,6 @@ impl Optimize for Optimizer { let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), @@ -342,7 +337,6 @@ impl<'s> Optimize>> for Optimizer { &df_builder, &*stats, &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), diff --git a/src/adapter/src/optimize/subscribe.rs b/src/adapter/src/optimize/subscribe.rs index ab0da543a96a6..1203f90738a37 100644 --- a/src/adapter/src/optimize/subscribe.rs +++ b/src/adapter/src/optimize/subscribe.rs @@ -29,7 +29,6 @@ use mz_transform::normalize_lets::normalize_lets; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use timely::progress::Antichain; use crate::CollectionIdBundle; @@ -43,8 +42,6 @@ use crate::optimize::{ }; pub struct Optimizer { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. @@ -99,7 +96,6 @@ impl Optimizer { metrics: OptimizerMetrics, ) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, @@ -242,7 +238,6 @@ impl Optimize for Optimizer { // MIR ⇒ MIR optimization (local) let mut transform_ctx = TransformCtx::local( &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), @@ -286,7 +281,6 @@ impl Optimize for Optimizer { &df_builder, &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, Some(&mut self.metrics), diff --git a/src/adapter/src/optimize/view.rs b/src/adapter/src/optimize/view.rs index f52e98f851b08..8e241b386abd4 100644 --- a/src/adapter/src/optimize/view.rs +++ b/src/adapter/src/optimize/view.rs @@ -29,7 +29,6 @@ use mz_transform::dataflow::DataflowMetainfo; use mz_transform::reprtypecheck::{ SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, }; -use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use crate::optimize::dataflows::{ExprPrepStyle, prep_relation_expr}; use crate::optimize::{ @@ -38,8 +37,6 @@ use crate::optimize::{ }; pub struct Optimizer<'a> { - /// A typechecking context to use throughout the optimizer pipeline. - typecheck_ctx: TypecheckContext, /// A representation typechecking context to use throughout the optimizer pipeline. repr_typecheck_ctx: ReprTypecheckContext, /// Optimizer config. @@ -58,7 +55,6 @@ pub struct Optimizer<'a> { impl<'a> Optimizer<'a> { pub fn new(config: OptimizerConfig, metrics: Option) -> Self { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), config, metrics, @@ -76,7 +72,6 @@ impl<'a> Optimizer<'a> { expr_prep_style: ExprPrepStyle<'a>, ) -> Optimizer<'a> { Self { - typecheck_ctx: empty_context(), repr_typecheck_ctx: empty_repr_context(), config, metrics, @@ -101,7 +96,6 @@ impl Optimize for Optimizer<'_> { let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &self.config.features, - &self.typecheck_ctx, &self.repr_typecheck_ctx, &mut df_meta, self.metrics.as_mut(), diff --git a/src/repr/src/optimize.rs b/src/repr/src/optimize.rs index 3ad66e4482fb4..fb2e5c64ac2fe 100644 --- a/src/repr/src/optimize.rs +++ b/src/repr/src/optimize.rs @@ -131,7 +131,6 @@ optimizer_feature_flags!({ // See the feature flag of the same name. enable_dequadratic_eqprop_map: bool, enable_fast_path_plan_insights: bool, - enable_repr_typecheck: bool, }); /// A trait used to implement layered config construction. diff --git a/src/sql/src/plan/statement/ddl.rs b/src/sql/src/plan/statement/ddl.rs index e0c6309bc3cbf..45316d3dac06c 100644 --- a/src/sql/src/plan/statement/ddl.rs +++ b/src/sql/src/plan/statement/ddl.rs @@ -4913,7 +4913,6 @@ pub fn unplan_create_cluster( enable_dequadratic_eqprop_map: _, enable_eq_classes_withholding_errors: _, enable_fast_path_plan_insights: _, - enable_repr_typecheck: _, } = optimizer_feature_overrides; // The ones from above that don't occur below are not wired up to cluster features. let features_extracted = ClusterFeatureExtracted { diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index 88c72b07848d0..551acc7e603e9 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -638,7 +638,6 @@ impl TryFrom for ExplainConfig { enable_dequadratic_eqprop_map: Default::default(), enable_eq_classes_withholding_errors: Default::default(), enable_fast_path_plan_insights: Default::default(), - enable_repr_typecheck: Default::default(), }, }) } diff --git a/src/sql/src/session/vars/definitions.rs b/src/sql/src/session/vars/definitions.rs index 84c83283e9a4c..f7cecb877f4e6 100644 --- a/src/sql/src/session/vars/definitions.rs +++ b/src/sql/src/session/vars/definitions.rs @@ -2218,12 +2218,6 @@ feature_flags!( default: false, enable_for_item_parsing: true, }, - { - name: enable_repr_typecheck, - desc: "Enable typechecking using representation types", - default: false, - enable_for_item_parsing: false, - }, { name: enable_frontend_peek_sequencing, // currently, changes only take effect for new sessions desc: "Enables the new peek sequencing code, which does most of its work in the Adapter Frontend instead of the Coordinator main task.", @@ -2253,7 +2247,6 @@ impl From<&super::SystemVars> for OptimizerFeatures { enable_dequadratic_eqprop_map: vars.enable_dequadratic_eqprop_map(), enable_eq_classes_withholding_errors: vars.enable_eq_classes_withholding_errors(), enable_fast_path_plan_insights: vars.enable_fast_path_plan_insights(), - enable_repr_typecheck: vars.enable_repr_typecheck(), } } } diff --git a/src/transform/src/lib.rs b/src/transform/src/lib.rs index e3a040e136465..564da46da62b7 100644 --- a/src/transform/src/lib.rs +++ b/src/transform/src/lib.rs @@ -57,7 +57,6 @@ use crate::redundant_join::RedundantJoin; use crate::reprtypecheck::{SharedContext as ReprSharedContext, Typecheck as ReprTypecheck}; use crate::semijoin_idempotence::SemijoinIdempotence; use crate::threshold_elision::ThresholdElision; -use crate::typecheck::{SharedContext, Typecheck}; use crate::union_cancel::UnionBranchCancellation; use crate::will_distinct::WillDistinct; @@ -92,7 +91,6 @@ pub mod redundant_join; pub mod reprtypecheck; pub mod semijoin_idempotence; pub mod threshold_elision; -pub mod typecheck; pub mod union_cancel; pub mod will_distinct; @@ -122,8 +120,6 @@ pub struct TransformCtx<'a> { pub stats: &'a dyn StatisticsOracle, /// Features passed to the enclosing `Optimizer`. pub features: &'a OptimizerFeatures, - /// Typechecking context. - pub typecheck_ctx: &'a SharedContext, /// Representation typechecking context. pub repr_typecheck_ctx: &'a ReprSharedContext, /// Transforms can use this field to communicate information outside the result plans. @@ -145,7 +141,6 @@ impl<'a> TransformCtx<'a> { /// [`MirRelationExpr`]. pub fn local( features: &'a OptimizerFeatures, - typecheck_ctx: &'a SharedContext, repr_typecheck_ctx: &'a ReprSharedContext, df_meta: &'a mut DataflowMetainfo, metrics: Option<&'a mut OptimizerMetrics>, @@ -156,7 +151,6 @@ impl<'a> TransformCtx<'a> { stats: &EmptyStatisticsOracle, global_id, features, - typecheck_ctx, repr_typecheck_ctx, df_meta, metrics, @@ -172,7 +166,6 @@ impl<'a> TransformCtx<'a> { indexes: &'a dyn IndexOracle, stats: &'a dyn StatisticsOracle, features: &'a OptimizerFeatures, - typecheck_ctx: &'a SharedContext, repr_typecheck_ctx: &'a ReprSharedContext, df_meta: &'a mut DataflowMetainfo, metrics: Option<&'a mut OptimizerMetrics>, @@ -183,17 +176,12 @@ impl<'a> TransformCtx<'a> { global_id: None, features, df_meta, - typecheck_ctx, repr_typecheck_ctx, metrics, last_hash: Default::default(), } } - fn typecheck(&self) -> SharedContext { - Arc::clone(self.typecheck_ctx) - } - fn repr_typecheck(&self) -> ReprSharedContext { Arc::clone(self.repr_typecheck_ctx) } @@ -745,8 +733,7 @@ impl Optimizer { #[deprecated = "Create an Optimize instance and call `optimize` instead."] pub fn logical_optimizer(ctx: &mut TransformCtx) -> Self { let transforms: Vec> = transforms![ - Box::new(Typecheck::new(ctx.typecheck()).strict_join_equivalences()), - Box::new(ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences()); if ctx.features.enable_repr_typecheck, + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences()), // 1. Structure-agnostic cleanup Box::new(normalize()), Box::new(NonNullRequirements::default()), @@ -795,11 +782,10 @@ impl Optimizer { ], }), Box::new( - Typecheck::new(ctx.typecheck()) + ReprTypecheck::new(ctx.repr_typecheck()) .disallow_new_globals() - .strict_join_equivalences(), + .strict_join_equivalences() ), - Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, ]; Self { name: "logical", @@ -816,12 +802,7 @@ impl Optimizer { pub fn physical_optimizer(ctx: &mut TransformCtx) -> Self { // Implementation transformations let transforms: Vec> = transforms![ - Box::new( - Typecheck::new(ctx.typecheck()) - .disallow_new_globals() - .strict_join_equivalences(), - ), - Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()), // Considerations for the relationship between JoinImplementation and other transforms: // - there should be a run of LiteralConstraints before JoinImplementation lifts away // the Filters from the Gets; @@ -884,12 +865,7 @@ impl Optimizer { // - `CollectIndexRequests` needs a normalized plan. // https://github.com/MaterializeInc/database-issues/issues/6371 Box::new(fold_constants_fixpoint(true)), - Box::new( - Typecheck::new(ctx.typecheck()) - .disallow_new_globals() - .disallow_dummy(), - ), - Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().disallow_dummy().strict_join_equivalences()), ]; Self { name: "physical", @@ -904,18 +880,14 @@ impl Optimizer { /// The first instance of the typechecker in an optimizer pipeline should /// allow new globals (or it will crash when it encounters them). pub fn logical_cleanup_pass(ctx: &mut TransformCtx, allow_new_globals: bool) -> Self { - let mut typechecker = Typecheck::new(ctx.typecheck()).strict_join_equivalences(); - let mut repr_typechecker = ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences(); if !allow_new_globals { - typechecker = typechecker.disallow_new_globals(); repr_typechecker = repr_typechecker.disallow_new_globals(); } let transforms: Vec> = transforms![ - Box::new(typechecker), - Box::new(repr_typechecker); if ctx.features.enable_repr_typecheck, + Box::new(repr_typechecker), // Delete unnecessary maps. Box::new(fusion::Fusion), Box::new(Fixpoint { @@ -943,11 +915,10 @@ impl Optimizer { ], }), Box::new( - Typecheck::new(ctx.typecheck()) + ReprTypecheck::new(ctx.repr_typecheck()) .disallow_new_globals() - .strict_join_equivalences(), + .strict_join_equivalences() ), - Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, ]; Self { name: "logical_cleanup", diff --git a/src/transform/src/typecheck.rs b/src/transform/src/typecheck.rs deleted file mode 100644 index dabcf03b9587b..0000000000000 --- a/src/transform/src/typecheck.rs +++ /dev/null @@ -1,1598 +0,0 @@ -// Copyright Materialize, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -//! Check that the visible type of each query has not been changed - -use std::collections::BTreeMap; -use std::fmt::Write; -use std::sync::{Arc, Mutex}; - -use itertools::Itertools; -use mz_expr::explain::{HumanizedExplain, HumanizerMode}; -use mz_expr::{ - AggregateExpr, ColumnOrder, Id, JoinImplementation, LocalId, MirRelationExpr, MirScalarExpr, - RECURSION_LIMIT, non_nullable_columns, -}; -use mz_ore::soft_panic_or_log; -use mz_ore::stack::{CheckedRecursion, RecursionGuard, RecursionLimitError}; -use mz_repr::explain::{DummyHumanizer, ExprHumanizer}; -use mz_repr::{ColumnName, Row, SqlColumnType, SqlRelationType, SqlScalarBaseType, SqlScalarType}; - -/// Typechecking contexts as shared by various typechecking passes. -/// -/// We use a `RefCell` to ensure that contexts are shared by multiple typechecker passes. -/// Shared contexts help catch consistency issues. -pub type SharedContext = Arc>; - -/// Generates an empty context -pub fn empty_context() -> SharedContext { - Arc::new(Mutex::new(BTreeMap::new())) -} - -/// The possible forms of inconsistency/errors discovered during typechecking. -/// -/// Every variant has a `source` field identifying the MIR term that is home -/// to the error (though not necessarily the root cause of the error). -#[derive(Clone, Debug)] -pub enum TypeError<'a> { - /// Unbound identifiers (local or global) - Unbound { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The (unbound) identifier referenced - id: Id, - /// The type `id` was expected to have - typ: SqlRelationType, - }, - /// Dereference of a non-existent column - NoSuchColumn { - /// Expression with the bug - source: &'a MirRelationExpr, - /// Scalar expression that references an invalid column - expr: &'a MirScalarExpr, - /// The invalid column referenced - col: usize, - }, - /// A single column type does not match - MismatchColumn { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The column type we found (`sub` type) - got: SqlColumnType, - /// The column type we expected (`sup` type) - expected: SqlColumnType, - /// The difference between these types - diffs: Vec, - /// An explanatory message - message: String, - }, - /// Relation column types do not match - MismatchColumns { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The column types we found (`sub` type) - got: Vec, - /// The column types we expected (`sup` type) - expected: Vec, - /// The difference between these types - diffs: Vec, - /// An explanatory message - message: String, - }, - /// A constant row does not have the correct type - BadConstantRow { - /// Expression with the bug - source: &'a MirRelationExpr, - /// A constant row - got: Row, - /// The expected type (which that row does not have) - expected: Vec, - // TODO(mgree) with a good way to get the type of a Datum, we could give a diff here - }, - /// Projection of a non-existent column - BadProject { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The column projected - got: Vec, - /// The input columns (which don't have that column) - input_type: Vec, - }, - /// An equivalence class in a join was malformed - BadJoinEquivalence { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The join equivalences - got: Vec, - /// The problem with the join equivalences - message: String, - }, - /// TopK grouping by non-existent column - BadTopKGroupKey { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The bad column reference in the group key - k: usize, - /// The input columns (which don't have that column) - input_type: Vec, - }, - /// TopK ordering by non-existent column - BadTopKOrdering { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The ordering used - order: ColumnOrder, - /// The input columns (which don't work for that ordering) - input_type: Vec, - }, - /// LetRec bindings are malformed - BadLetRecBindings { - /// Expression with the bug - source: &'a MirRelationExpr, - }, - /// Local identifiers are shadowed - Shadowing { - /// Expression with the bug - source: &'a MirRelationExpr, - /// The id that was shadowed - id: Id, - }, - /// Recursion depth exceeded - Recursion { - /// The error that aborted recursion - error: RecursionLimitError, - }, - /// A dummy value was found - DisallowedDummy { - /// The expression with the dummy value - source: &'a MirRelationExpr, - }, -} - -impl<'a> From for TypeError<'a> { - fn from(error: RecursionLimitError) -> Self { - TypeError::Recursion { error } - } -} - -type Context = BTreeMap>; - -/// Characterizes differences between relation types -/// -/// Each constructor indicates a reason why some type `sub` was not a subtype of another type `sup` -#[derive(Clone, Debug, Hash)] -pub enum SqlRelationTypeDifference { - /// `sub` and `sup` don't have the same number of columns - Length { - /// Length of `sub` - len_sub: usize, - /// Length of `sup` - len_sup: usize, - }, - /// `sub` and `sup` differ at the indicated column - Column { - /// The column at which `sub` and `sup` differ - col: usize, - /// The difference between `sub` and `sup` - diff: SqlColumnTypeDifference, - }, -} - -/// Characterizes differences between individual column types -/// -/// Each constructor indicates a reason why some type `sub` was not a subtype of another type `sup` -/// There may be multiple reasons, e.g., `sub` may be missing fields and have fields of different types -#[derive(Clone, Debug, Hash)] -pub enum SqlColumnTypeDifference { - /// The `ScalarBaseType` of `sub` doesn't match that of `sup` - NotSubtype { - /// Would-be subtype - sub: SqlScalarType, - /// Would-be supertype - sup: SqlScalarType, - }, - /// `sub` was nullable but `sup` was not - Nullability { - /// Would-be subtype - sub: SqlColumnType, - /// Would-be supertype - sup: SqlColumnType, - }, - /// Both `sub` and `sup` are a list, map, array, or range, but `sub`'s element type differed from `sup`s - ElementType { - /// The type constructor (list, array, etc.) - ctor: String, - /// The difference in the element type - element_type: Box, - }, - /// `sub` and `sup` are both records, but `sub` is missing fields present in `sup` - RecordMissingFields { - /// The missing fields - missing: Vec, - }, - /// `sub` and `sup` are both records, but some fields in `sub` are not subtypes of fields in `sup` - RecordFields { - /// The differences, by field - fields: Vec<(ColumnName, SqlColumnTypeDifference)>, - }, -} - -impl SqlRelationTypeDifference { - /// Returns the same type difference, but ignoring nullability - /// - /// Returns `None` when _all_ of the differences are due to nullability - pub fn ignore_nullability(self) -> Option { - use SqlRelationTypeDifference::*; - - match self { - Length { .. } => Some(self), - Column { col, diff } => diff.ignore_nullability().map(|diff| Column { col, diff }), - } - } -} - -impl SqlColumnTypeDifference { - /// Returns the same type difference, but ignoring nullability - /// - /// Returns `None` when _all_ of the differences are due to nullability - pub fn ignore_nullability(self) -> Option { - use SqlColumnTypeDifference::*; - - match self { - Nullability { .. } => None, - NotSubtype { .. } | RecordMissingFields { .. } => Some(self), - ElementType { ctor, element_type } => { - element_type - .ignore_nullability() - .map(|element_type| ElementType { - ctor, - element_type: Box::new(element_type), - }) - } - RecordFields { fields } => { - let fields = fields - .into_iter() - .flat_map(|(col, diff)| diff.ignore_nullability().map(|diff| (col, diff))) - .collect::>(); - - if fields.is_empty() { - None - } else { - Some(RecordFields { fields }) - } - } - } - } -} - -/// Returns a list of differences that make `sub` not a subtype of `sup` -/// -/// This function returns an empty list when `sub` is a subtype of `sup` -pub fn relation_subtype_difference( - sub: &[SqlColumnType], - sup: &[SqlColumnType], -) -> Vec { - let mut diffs = Vec::new(); - - if sub.len() != sup.len() { - diffs.push(SqlRelationTypeDifference::Length { - len_sub: sub.len(), - len_sup: sup.len(), - }); - - // TODO(mgree) we could do an edit-distance computation to report more errors - return diffs; - } - - diffs.extend( - sub.iter() - .zip_eq(sup.iter()) - .enumerate() - .flat_map(|(col, (sub_ty, sup_ty))| { - column_subtype_difference(sub_ty, sup_ty) - .into_iter() - .map(move |diff| SqlRelationTypeDifference::Column { col, diff }) - }), - ); - - diffs -} - -/// Returns a list of differences that make `sub` not a subtype of `sup` -/// -/// This function returns an empty list when `sub` is a subtype of `sup` -pub fn column_subtype_difference( - sub: &SqlColumnType, - sup: &SqlColumnType, -) -> Vec { - let mut diffs = scalar_subtype_difference(&sub.scalar_type, &sup.scalar_type); - - if sub.nullable && !sup.nullable { - diffs.push(SqlColumnTypeDifference::Nullability { - sub: sub.clone(), - sup: sup.clone(), - }); - } - - diffs -} - -/// Returns a list of differences that make `sub` not a subtype of `sup` -/// -/// This function returns an empty list when `sub` is a subtype of `sup` -pub fn scalar_subtype_difference( - sub: &SqlScalarType, - sup: &SqlScalarType, -) -> Vec { - use SqlScalarType::*; - - let mut diffs = Vec::new(); - - match (sub, sup) { - ( - List { - element_type: sub_elt, - .. - }, - List { - element_type: sup_elt, - .. - }, - ) - | ( - Map { - value_type: sub_elt, - .. - }, - Map { - value_type: sup_elt, - .. - }, - ) - | ( - Range { - element_type: sub_elt, - .. - }, - Range { - element_type: sup_elt, - .. - }, - ) - | (Array(sub_elt), Array(sup_elt)) => { - let ctor = format!("{:?}", SqlScalarBaseType::from(sub)); - diffs.extend( - scalar_subtype_difference(sub_elt, sup_elt) - .into_iter() - .map(|diff| SqlColumnTypeDifference::ElementType { - ctor: ctor.clone(), - element_type: Box::new(diff), - }), - ); - } - ( - Record { - fields: sub_fields, .. - }, - Record { - fields: sup_fields, .. - }, - ) => { - let sub = sub_fields - .iter() - .map(|(sub_field, sub_ty)| (sub_field.clone(), sub_ty)) - .collect::>(); - - let mut missing = Vec::new(); - let mut field_diffs = Vec::new(); - for (sup_field, sup_ty) in sup_fields { - if let Some(sub_ty) = sub.get(sup_field) { - let diff = column_subtype_difference(sub_ty, sup_ty); - - if !diff.is_empty() { - field_diffs.push((sup_field.clone(), diff)); - } - } else { - missing.push(sup_field.clone()); - } - } - } - (_, _) => { - // TODO(mgree) confirm that we don't want to allow numeric subtyping - if SqlScalarBaseType::from(sub) != SqlScalarBaseType::from(sup) { - diffs.push(SqlColumnTypeDifference::NotSubtype { - sub: sub.clone(), - sup: sup.clone(), - }) - } - } - }; - - diffs -} - -/// Returns true when it is safe to treat a `sub` row as an `sup` row -/// -/// In particular, the core types must be equal, and if a column in `sup` is nullable, that column should also be nullable in `sub` -/// Conversely, it is okay to treat a known non-nullable column as nullable: `sub` may be nullable when `sup` is not -pub fn is_subtype_of(sub: &[SqlColumnType], sup: &[SqlColumnType]) -> bool { - if sub.len() != sup.len() { - return false; - } - - sub.iter().zip_eq(sup.iter()).all(|(got, known)| { - (!known.nullable || got.nullable) && got.scalar_type.base_eq(&known.scalar_type) - }) -} - -/// Check that the visible type of each query has not been changed -#[derive(Debug)] -pub struct Typecheck { - /// The known types of the queries so far - ctx: SharedContext, - /// Whether or not this is the first run of the transform - disallow_new_globals: bool, - /// Whether or not to be strict about join equivalences having the same nullability - strict_join_equivalences: bool, - /// Whether or not to disallow dummy values - disallow_dummy: bool, - /// Recursion guard for checked recursion - recursion_guard: RecursionGuard, -} - -impl CheckedRecursion for Typecheck { - fn recursion_guard(&self) -> &RecursionGuard { - &self.recursion_guard - } -} - -impl Typecheck { - /// Creates a typechecking consistency checking pass using a given shared context - pub fn new(ctx: SharedContext) -> Self { - Self { - ctx, - disallow_new_globals: false, - strict_join_equivalences: false, - disallow_dummy: false, - recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT), - } - } - - /// New non-transient global IDs will be treated as an error - /// - /// Only turn this on after the context has been appropriately populated by, e.g., an earlier run - pub fn disallow_new_globals(mut self) -> Self { - self.disallow_new_globals = true; - self - } - - /// Equivalence classes in joins must not only agree on scalar type, but also on nullability - /// - /// Only turn this on before `JoinImplementation` - pub fn strict_join_equivalences(mut self) -> Self { - self.strict_join_equivalences = true; - - self - } - - /// Disallow dummy values - pub fn disallow_dummy(mut self) -> Self { - self.disallow_dummy = true; - self - } - - /// Returns the type of a relation expression or a type error. - /// - /// This function is careful to check validity, not just find out the type. - /// - /// It should be linear in the size of the AST. - /// - /// ??? should we also compute keys and return a `SqlRelationType`? - /// ggevay: Checking keys would have the same problem as checking nullability: key inference - /// is very heuristic (even more so than nullability inference), so it's almost impossible to - /// reliably keep it stable across transformations. - pub fn typecheck<'a>( - &self, - expr: &'a MirRelationExpr, - ctx: &Context, - ) -> Result, TypeError<'a>> { - use MirRelationExpr::*; - - self.checked_recur(|tc| match expr { - Constant { typ, rows } => { - if let Ok(rows) = rows { - for (row, _id) in rows { - let datums = row.unpack(); - - // correct length - if datums.len() != typ.column_types.len() { - return Err(TypeError::BadConstantRow { - source: expr, - got: row.clone(), - expected: typ.column_types.clone(), - }); - } - - // correct types - if datums - .iter() - .zip_eq(typ.column_types.iter()) - .any(|(d, ty)| d != &mz_repr::Datum::Dummy && !d.is_instance_of_sql(ty)) - { - return Err(TypeError::BadConstantRow { - source: expr, - got: row.clone(), - expected: typ.column_types.clone(), - }); - } - - if self.disallow_dummy && datums.iter().any(|d| d == &mz_repr::Datum::Dummy) { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - } - } - - Ok(typ.column_types.clone()) - } - Get { typ, id, .. } => { - if let Id::Global(_global_id) = id { - if !ctx.contains_key(id) { - // TODO(mgree) pass QueryContext through to check these types - return Ok(typ.column_types.clone()); - } - } - - let ctx_typ = ctx.get(id).ok_or_else(|| TypeError::Unbound { - source: expr, - id: id.clone(), - typ: typ.clone(), - })?; - - // covariant: the ascribed type must be a subtype of the actual type in the context - let diffs = relation_subtype_difference(&typ.column_types, ctx_typ).into_iter().flat_map(|diff| diff.ignore_nullability()).collect::>(); - - if !diffs.is_empty() { - return Err(TypeError::MismatchColumns { - source: expr, - got: typ.column_types.clone(), - expected: ctx_typ.clone(), - diffs, - message: "annotation did not match context type".into(), - }); - } - - Ok(typ.column_types.clone()) - } - Project { input, outputs } => { - let t_in = tc.typecheck(input, ctx)?; - - for x in outputs { - if *x >= t_in.len() { - return Err(TypeError::BadProject { - source: expr, - got: outputs.clone(), - input_type: t_in, - }); - } - } - - Ok(outputs.iter().map(|col| t_in[*col].clone()).collect()) - } - Map { input, scalars } => { - let mut t_in = tc.typecheck(input, ctx)?; - - for scalar_expr in scalars.iter() { - t_in.push(tc.typecheck_scalar(scalar_expr, expr, &t_in)?); - - if self.disallow_dummy && scalar_expr.contains_dummy() { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - } - - Ok(t_in) - } - FlatMap { input, func, exprs } => { - let mut t_in = tc.typecheck(input, ctx)?; - - let mut t_exprs = Vec::with_capacity(exprs.len()); - for scalar_expr in exprs { - t_exprs.push(tc.typecheck_scalar(scalar_expr, expr, &t_in)?); - - if self.disallow_dummy && scalar_expr.contains_dummy() { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - } - // TODO(mgree) check t_exprs agrees with `func`'s input type - - let t_out = func.output_type().column_types; - - // FlatMap extends the existing columns - t_in.extend(t_out); - Ok(t_in) - } - Filter { input, predicates } => { - let mut t_in = tc.typecheck(input, ctx)?; - - // Set as nonnull any columns where null values would cause - // any predicate to evaluate to null. - for column in non_nullable_columns(predicates) { - t_in[column].nullable = false; - } - - for scalar_expr in predicates { - let t = tc.typecheck_scalar(scalar_expr, expr, &t_in)?; - - // filter condition must be boolean - // ignoring nullability: null is treated as false - // NB this behavior is slightly different from columns_match (for which we would set nullable to false in the expected type) - if t.scalar_type != SqlScalarType::Bool { - let sub = t.scalar_type.clone(); - - return Err(TypeError::MismatchColumn { - source: expr, - got: t, - expected: SqlColumnType { - scalar_type: SqlScalarType::Bool, - nullable: true, - }, - diffs: vec![SqlColumnTypeDifference::NotSubtype { sub, sup: SqlScalarType::Bool }], - message: "expected boolean condition".into(), - }); - } - - if self.disallow_dummy && scalar_expr.contains_dummy() { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - } - - Ok(t_in) - } - Join { - inputs, - equivalences, - implementation, - } => { - let mut t_in_global = Vec::new(); - let mut t_in_local = vec![Vec::new(); inputs.len()]; - - for (i, input) in inputs.iter().enumerate() { - let input_t = tc.typecheck(input, ctx)?; - t_in_global.extend(input_t.clone()); - t_in_local[i] = input_t; - } - - for eq_class in equivalences { - let mut t_exprs: Vec = Vec::with_capacity(eq_class.len()); - - let mut all_nullable = true; - - for scalar_expr in eq_class { - // Note: the equivalences have global column references - let t_expr = tc.typecheck_scalar(scalar_expr, expr, &t_in_global)?; - - if !t_expr.nullable { - all_nullable = false; - } - - if let Some(t_first) = t_exprs.get(0) { - let diffs = scalar_subtype_difference(&t_expr.scalar_type, &t_first.scalar_type); - if !diffs.is_empty() { - return Err(TypeError::MismatchColumn { - source: expr, - got: t_expr, - expected: t_first.clone(), - diffs, - message: "equivalence class members have different scalar types".into(), - }); - } - - // equivalences may or may not match on nullability - // before JoinImplementation runs, nullability should match. - // but afterwards, some nulls may appear that are actually being filtered out elsewhere - if self.strict_join_equivalences { - if t_expr.nullable != t_first.nullable { - let sub = t_expr.clone(); - let sup = t_first.clone(); - - let err = TypeError::MismatchColumn { - source: expr, - got: t_expr.clone(), - expected: t_first.clone(), - diffs: vec![SqlColumnTypeDifference::Nullability { sub, sup }], - message: "equivalence class members have different nullability (and join equivalence checking is strict)".to_string(), - }; - - // TODO(mgree) this imprecision should be resolved, but we need to fix the optimizer - ::tracing::debug!("{err}"); - } - } - } - - if self.disallow_dummy && scalar_expr.contains_dummy() { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - - t_exprs.push(t_expr); - } - - if self.strict_join_equivalences && all_nullable { - let err = TypeError::BadJoinEquivalence { - source: expr, - got: t_exprs, - message: "all expressions were nullable (and join equivalence checking is strict)".to_string(), - }; - - // TODO(mgree) this imprecision should be resolved, but we need to fix the optimizer - ::tracing::debug!("{err}"); - } - } - - // check that the join implementation is consistent - match implementation { - JoinImplementation::Differential((start_idx, first_key, _), others) => { - if let Some(key) = first_key { - for k in key { - let _ = tc.typecheck_scalar(k, expr, &t_in_local[*start_idx])?; - } - } - - for (idx, key, _) in others { - for k in key { - let _ = tc.typecheck_scalar(k, expr, &t_in_local[*idx])?; - } - } - } - JoinImplementation::DeltaQuery(plans) => { - for plan in plans { - for (idx, key, _) in plan { - for k in key { - let _ = tc.typecheck_scalar(k, expr, &t_in_local[*idx])?; - } - } - } - } - JoinImplementation::IndexedFilter(_coll_id, _idx_id, key, consts) => { - let typ: Vec = key - .iter() - .map(|k| tc.typecheck_scalar(k, expr, &t_in_global)) - .collect::, TypeError>>()?; - - for row in consts { - let datums = row.unpack(); - - // correct length - if datums.len() != typ.len() { - return Err(TypeError::BadConstantRow { - source: expr, - got: row.clone(), - expected: typ, - }); - } - - // correct types - if datums - .iter() - .zip_eq(typ.iter()) - .any(|(d, ty)| d != &mz_repr::Datum::Dummy && !d.is_instance_of_sql(ty)) - { - return Err(TypeError::BadConstantRow { - source: expr, - got: row.clone(), - expected: typ, - }); - } - } - } - JoinImplementation::Unimplemented => (), - } - - Ok(t_in_global) - } - Reduce { - input, - group_key, - aggregates, - monotonic: _, - expected_group_size: _, - } => { - let t_in = tc.typecheck(input, ctx)?; - - let mut t_out = group_key - .iter() - .map(|scalar_expr| tc.typecheck_scalar(scalar_expr, expr, &t_in)) - .collect::, _>>()?; - - if self.disallow_dummy && group_key.iter().any(|scalar_expr| scalar_expr.contains_dummy()) { - return Err(TypeError::DisallowedDummy { - source: expr, - }); - } - - for agg in aggregates { - t_out.push(tc.typecheck_aggregate(agg, expr, &t_in)?); - } - - Ok(t_out) - } - TopK { - input, - group_key, - order_key, - limit: _, - offset: _, - monotonic: _, - expected_group_size: _, - } => { - let t_in = tc.typecheck(input, ctx)?; - - for &k in group_key { - if k >= t_in.len() { - return Err(TypeError::BadTopKGroupKey { - source: expr, - k, - input_type: t_in, - }); - } - } - - for order in order_key { - if order.column >= t_in.len() { - return Err(TypeError::BadTopKOrdering { - source: expr, - order: order.clone(), - input_type: t_in, - }); - } - } - - Ok(t_in) - } - Negate { input } => tc.typecheck(input, ctx), - Threshold { input } => tc.typecheck(input, ctx), - Union { base, inputs } => { - let mut t_base = tc.typecheck(base, ctx)?; - - for input in inputs { - let t_input = tc.typecheck(input, ctx)?; - - let len_sub = t_base.len(); - let len_sup = t_input.len(); - if len_sub != len_sup { - return Err(TypeError::MismatchColumns { - source: expr, - got: t_base.clone(), - expected: t_input, - diffs: vec![SqlRelationTypeDifference::Length { - len_sub, - len_sup, - }], - message: "union branches have different numbers of columns".into(), - }); - } - - for (base_col, input_col) in t_base.iter_mut().zip_eq(t_input) { - *base_col = - base_col - .union(&input_col) - .map_err(|e| { - let base_col = base_col.clone(); - let diffs = column_subtype_difference(&base_col, &input_col); - - TypeError::MismatchColumn { - source: expr, - got: input_col, - expected: base_col, - diffs, - message: format!( - "couldn't compute union of column types in union: {e}" - ), - } - })?; - } - } - - Ok(t_base) - } - Let { id, value, body } => { - let t_value = tc.typecheck(value, ctx)?; - - let binding = Id::Local(*id); - if ctx.contains_key(&binding) { - return Err(TypeError::Shadowing { - source: expr, - id: binding, - }); - } - - let mut body_ctx = ctx.clone(); - body_ctx.insert(Id::Local(*id), t_value); - - tc.typecheck(body, &body_ctx) - } - LetRec { ids, values, body, limits: _ } => { - if ids.len() != values.len() { - return Err(TypeError::BadLetRecBindings { source: expr }); - } - - // temporary hack: steal info from the Gets inside to learn the expected types - // if no get occurs in any definition or the body, that means that relation is dead code (which is okay) - let mut ctx = ctx.clone(); - // calling tc.collect_recursive_variable_types(expr, ...) triggers a panic due to nested letrecs with shadowing IDs - for inner_expr in values.iter().chain(std::iter::once(body.as_ref())) { - tc.collect_recursive_variable_types(inner_expr, ids, &mut ctx)?; - } - - for (id, value) in ids.iter().zip_eq(values.iter()) { - let typ = tc.typecheck(value, &ctx)?; - - let id = Id::Local(id.clone()); - if let Some(ctx_typ) = ctx.get_mut(&id) { - for (base_col, input_col) in ctx_typ.iter_mut().zip_eq(typ) { - *base_col = base_col.union(&input_col).map_err(|e| { - let base_col = base_col.clone(); - let diffs = column_subtype_difference(&base_col, &input_col); - - TypeError::MismatchColumn { - source: expr, - got: input_col, - expected: base_col, - diffs, - message: format!( - "couldn't compute union of column types in let rec: {e}" - ), - } - })?; - } - } else { - // dead code: no `Get` references this relation anywhere. we record the type anyway - ctx.insert(id, typ); - } - } - - tc.typecheck(body, &ctx) - } - ArrangeBy { input, keys } => { - let t_in = tc.typecheck(input, ctx)?; - - for key in keys { - for k in key { - let _ = tc.typecheck_scalar(k, expr, &t_in)?; - } - } - - Ok(t_in) - } - }) - } - - /// Traverses a term to collect the types of given ids. - /// - /// LetRec doesn't have type info stored in it. Until we change the MIR to track that information explicitly, we have to rebuild it from looking at the term. - fn collect_recursive_variable_types<'a>( - &self, - expr: &'a MirRelationExpr, - ids: &[LocalId], - ctx: &mut Context, - ) -> Result<(), TypeError<'a>> { - use MirRelationExpr::*; - - self.checked_recur(|tc| { - match expr { - Get { - id: Id::Local(id), - typ, - .. - } => { - if !ids.contains(id) { - return Ok(()); - } - - let id = Id::Local(id.clone()); - if let Some(ctx_typ) = ctx.get_mut(&id) { - for (base_col, input_col) in - ctx_typ.iter_mut().zip_eq(typ.column_types.iter()) - { - *base_col = base_col.union(input_col).map_err(|e| { - let base_col = base_col.clone(); - let diffs = column_subtype_difference(&base_col, input_col); - - TypeError::MismatchColumn { - source: expr, - got: input_col.clone(), - expected: base_col, - diffs, - message: format!( - "couldn't compute union of collected column types: {}", - e - ), - } - })?; - } - } else { - ctx.insert(id, typ.column_types.clone()); - } - } - Get { - id: Id::Global(..), .. - } - | Constant { .. } => (), - Let { id, value, body } => { - tc.collect_recursive_variable_types(value, ids, ctx)?; - - // we've shadowed the id - if ids.contains(id) { - return Err(TypeError::Shadowing { - source: expr, - id: Id::Local(*id), - }); - } - - tc.collect_recursive_variable_types(body, ids, ctx)?; - } - LetRec { - ids: inner_ids, - values, - body, - limits: _, - } => { - for inner_id in inner_ids { - if ids.contains(inner_id) { - return Err(TypeError::Shadowing { - source: expr, - id: Id::Local(*inner_id), - }); - } - } - - for value in values { - tc.collect_recursive_variable_types(value, ids, ctx)?; - } - - tc.collect_recursive_variable_types(body, ids, ctx)?; - } - Project { input, .. } - | Map { input, .. } - | FlatMap { input, .. } - | Filter { input, .. } - | Reduce { input, .. } - | TopK { input, .. } - | Negate { input } - | Threshold { input } - | ArrangeBy { input, .. } => { - tc.collect_recursive_variable_types(input, ids, ctx)?; - } - Join { inputs, .. } => { - for input in inputs { - tc.collect_recursive_variable_types(input, ids, ctx)?; - } - } - Union { base, inputs } => { - tc.collect_recursive_variable_types(base, ids, ctx)?; - - for input in inputs { - tc.collect_recursive_variable_types(input, ids, ctx)?; - } - } - } - - Ok(()) - }) - } - - fn typecheck_scalar<'a>( - &self, - expr: &'a MirScalarExpr, - source: &'a MirRelationExpr, - column_types: &[SqlColumnType], - ) -> Result> { - use MirScalarExpr::*; - - self.checked_recur(|tc| match expr { - Column(i, _) => match column_types.get(*i) { - Some(ty) => Ok(ty.clone()), - None => Err(TypeError::NoSuchColumn { - source, - expr, - col: *i, - }), - }, - Literal(row, typ) => { - if let Ok(row) = row { - let datums = row.unpack(); - - if datums.len() != 1 - || (datums[0] != mz_repr::Datum::Dummy - && !datums[0].is_instance_of_sql(typ)) - { - return Err(TypeError::BadConstantRow { - source, - got: row.clone(), - expected: vec![typ.clone()], - }); - } - } - - Ok(typ.clone()) - } - CallUnmaterializable(func) => Ok(func.output_type()), - CallUnary { expr, func } => { - Ok(func.output_type(tc.typecheck_scalar(expr, source, column_types)?)) - } - CallBinary { expr1, expr2, func } => Ok(func.output_type( - tc.typecheck_scalar(expr1, source, column_types)?, - tc.typecheck_scalar(expr2, source, column_types)?, - )), - CallVariadic { exprs, func } => Ok(func.output_type( - exprs - .iter() - .map(|e| tc.typecheck_scalar(e, source, column_types)) - .collect::, TypeError>>()?, - )), - If { cond, then, els } => { - let cond_type = tc.typecheck_scalar(cond, source, column_types)?; - - // condition must be boolean - // ignoring nullability: null is treated as false - // NB this behavior is slightly different from columns_match (for which we would set nullable to false in the expected type) - if cond_type.scalar_type != SqlScalarType::Bool { - let sub = cond_type.scalar_type.clone(); - - return Err(TypeError::MismatchColumn { - source, - got: cond_type, - expected: SqlColumnType { - scalar_type: SqlScalarType::Bool, - nullable: true, - }, - diffs: vec![SqlColumnTypeDifference::NotSubtype { - sub, - sup: SqlScalarType::Bool, - }], - message: "expected boolean condition".into(), - }); - } - - let then_type = tc.typecheck_scalar(then, source, column_types)?; - let else_type = tc.typecheck_scalar(els, source, column_types)?; - then_type.union(&else_type).map_err(|e| { - let diffs = column_subtype_difference(&then_type, &else_type); - - TypeError::MismatchColumn { - source, - got: then_type, - expected: else_type, - diffs, - message: format!("couldn't compute union of column types for if: {e}"), - } - }) - } - }) - } - - /// Typecheck an `AggregateExpr` - pub fn typecheck_aggregate<'a>( - &self, - expr: &'a AggregateExpr, - source: &'a MirRelationExpr, - column_types: &[SqlColumnType], - ) -> Result> { - self.checked_recur(|tc| { - let t_in = tc.typecheck_scalar(&expr.expr, source, column_types)?; - - // TODO check that t_in is actually acceptable for `func` - - Ok(expr.func.output_type(t_in)) - }) - } -} - -/// Detailed type error logging as a warning, with failures in CI and a logged error in production -/// -/// type_error(severity, ...) logs a type warning; if `severity` is `true`, it will also log an error (visible in Sentry) -macro_rules! type_error { - ($severity:expr, $($arg:tt)+) => {{ - if $severity { - soft_panic_or_log!($($arg)+); - } else { - ::tracing::debug!($($arg)+); - } - }} -} - -impl crate::Transform for Typecheck { - fn name(&self) -> &'static str { - "Typecheck" - } - - fn actually_perform_transform( - &self, - relation: &mut MirRelationExpr, - transform_ctx: &mut crate::TransformCtx, - ) -> Result<(), crate::TransformError> { - let mut typecheck_ctx = self.ctx.lock().expect("typecheck ctx"); - - let expected = transform_ctx - .global_id - .map_or_else(|| None, |id| typecheck_ctx.get(&Id::Global(id))); - - if let Some(id) = transform_ctx.global_id { - if self.disallow_new_globals - && expected.is_none() - && transform_ctx.global_id.is_some() - && !id.is_transient() - { - type_error!( - false, // not severe - "type warning: new non-transient global id {id}\n{}", - relation.pretty() - ); - } - } - - let got = self.typecheck(relation, &typecheck_ctx); - - let humanizer = mz_repr::explain::DummyHumanizer; - - match (got, expected) { - (Ok(got), Some(expected)) => { - let id = transform_ctx.global_id.unwrap(); - - // contravariant: global types can be updated - let diffs = relation_subtype_difference(expected, &got); - if !diffs.is_empty() { - // SEVERE only if got and expected have true differences, not just nullability - let severity = diffs - .iter() - .any(|diff| diff.clone().ignore_nullability().is_some()); - - let err = TypeError::MismatchColumns { - source: relation, - got, - expected: expected.clone(), - diffs, - message: format!( - "a global id {id}'s type changed (was `expected` which should be a subtype of `got`) " - ), - }; - - type_error!(severity, "type error in known global id {id}:\n{err}"); - } - } - (Ok(got), None) => { - if let Some(id) = transform_ctx.global_id { - typecheck_ctx.insert(Id::Global(id), got); - } - } - (Err(err), _) => { - let (expected, binding) = match expected { - Some(expected) => { - let id = transform_ctx.global_id.unwrap(); - ( - format!("expected type {}\n", columns_pretty(expected, &humanizer)), - format!("known global id {id}"), - ) - } - None => ("".to_string(), "transient query".to_string()), - }; - - type_error!( - true, // SEVERE: the transformed code is inconsistent - "type error in {binding}:\n{err}\n{expected}{}", - relation.pretty() - ); - } - } - - Ok(()) - } -} - -/// Prints a type prettily with a given `ExprHumanizer` -pub fn columns_pretty(cols: &[SqlColumnType], humanizer: &H) -> String -where - H: ExprHumanizer, -{ - let mut s = String::with_capacity(2 + 3 * cols.len()); - - s.push('('); - - let mut it = cols.iter().peekable(); - while let Some(col) = it.next() { - s.push_str(&humanizer.humanize_column_type(col, false)); - - if it.peek().is_some() { - s.push_str(", "); - } - } - - s.push(')'); - - s -} - -impl SqlRelationTypeDifference { - /// Pretty prints a type difference - /// - /// Always indents two spaces - pub fn humanize(&self, h: &H, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result - where - H: ExprHumanizer, - { - use SqlRelationTypeDifference::*; - match self { - Length { len_sub, len_sup } => { - writeln!( - f, - " number of columns do not match ({len_sub} != {len_sup})" - ) - } - Column { col, diff } => { - writeln!(f, " column {col} differs:")?; - diff.humanize(4, h, f) - } - } - } -} - -impl SqlColumnTypeDifference { - /// Pretty prints a type difference at a given indentation level - pub fn humanize( - &self, - indent: usize, - h: &H, - f: &mut std::fmt::Formatter<'_>, - ) -> std::fmt::Result - where - H: ExprHumanizer, - { - use SqlColumnTypeDifference::*; - - // indent - write!(f, "{:indent$}", "")?; - - match self { - NotSubtype { sub, sup } => { - let sub = h.humanize_scalar_type(sub, false); - let sup = h.humanize_scalar_type(sup, false); - - writeln!(f, "{sub} is a not a subtype of {sup}") - } - Nullability { sub, sup } => { - let sub = h.humanize_column_type(sub, false); - let sup = h.humanize_column_type(sup, false); - - writeln!(f, "{sub} is nullable but {sup} is not") - } - ElementType { ctor, element_type } => { - writeln!(f, "{ctor} element types differ:")?; - - element_type.humanize(indent + 2, h, f) - } - RecordMissingFields { missing } => { - write!(f, "missing column fields:")?; - for col in missing { - write!(f, " {col}")?; - } - f.write_char('\n') - } - RecordFields { fields } => { - writeln!(f, "{} record fields differ:", fields.len())?; - - for (col, diff) in fields { - writeln!(f, "{:indent$} field '{col}':", "")?; - diff.humanize(indent + 4, h, f)?; - } - Ok(()) - } - } - } -} - -/// Wrapper struct for a `Display` instance for `TypeError`s with a given `ExprHumanizer` -#[allow(missing_debug_implementations)] -pub struct TypeErrorHumanizer<'a, 'b, H> -where - H: ExprHumanizer, -{ - err: &'a TypeError<'a>, - humanizer: &'b H, -} - -impl<'a, 'b, H> TypeErrorHumanizer<'a, 'b, H> -where - H: ExprHumanizer, -{ - /// Create a `Display`-shim struct for a given `TypeError`/`ExprHumanizer` pair - pub fn new(err: &'a TypeError, humanizer: &'b H) -> Self { - Self { err, humanizer } - } -} - -impl<'a, 'b, H> std::fmt::Display for TypeErrorHumanizer<'a, 'b, H> -where - H: ExprHumanizer, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.err.humanize(self.humanizer, f) - } -} - -impl<'a> std::fmt::Display for TypeError<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - TypeErrorHumanizer { - err: self, - humanizer: &DummyHumanizer, - } - .fmt(f) - } -} - -impl<'a> TypeError<'a> { - /// The source of the type error - pub fn source(&self) -> Option<&'a MirRelationExpr> { - use TypeError::*; - match self { - Unbound { source, .. } - | NoSuchColumn { source, .. } - | MismatchColumn { source, .. } - | MismatchColumns { source, .. } - | BadConstantRow { source, .. } - | BadProject { source, .. } - | BadJoinEquivalence { source, .. } - | BadTopKGroupKey { source, .. } - | BadTopKOrdering { source, .. } - | BadLetRecBindings { source } - | Shadowing { source, .. } - | DisallowedDummy { source, .. } => Some(source), - Recursion { .. } => None, - } - } - - fn humanize(&self, humanizer: &H, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result - where - H: ExprHumanizer, - { - if let Some(source) = self.source() { - writeln!(f, "In the MIR term:\n{}\n", source.pretty())?; - } - - use TypeError::*; - match self { - Unbound { source: _, id, typ } => { - let typ = columns_pretty(&typ.column_types, humanizer); - writeln!(f, "{id} is unbound\ndeclared type {typ}")? - } - NoSuchColumn { - source: _, - expr, - col, - } => writeln!(f, "{expr} references non-existent column {col}")?, - MismatchColumn { - source: _, - got, - expected, - diffs, - message, - } => { - let got = humanizer.humanize_column_type(got, false); - let expected = humanizer.humanize_column_type(expected, false); - writeln!( - f, - "mismatched column types: {message}\n got {got}\nexpected {expected}" - )?; - - for diff in diffs { - diff.humanize(2, humanizer, f)?; - } - } - MismatchColumns { - source: _, - got, - expected, - diffs, - message, - } => { - let got = columns_pretty(got, humanizer); - let expected = columns_pretty(expected, humanizer); - - writeln!( - f, - "mismatched relation types: {message}\n got {got}\nexpected {expected}" - )?; - - for diff in diffs { - diff.humanize(humanizer, f)?; - } - } - BadConstantRow { - source: _, - got, - expected, - } => { - let expected = columns_pretty(expected, humanizer); - - writeln!( - f, - "bad constant row\n got {got}\nexpected row of type {expected}" - )? - } - BadProject { - source: _, - got, - input_type, - } => { - let input_type = columns_pretty(input_type, humanizer); - - writeln!( - f, - "projection of non-existant columns {got:?} from type {input_type}" - )? - } - BadJoinEquivalence { - source: _, - got, - message, - } => { - let got = columns_pretty(got, humanizer); - - writeln!(f, "bad join equivalence {got}: {message}")? - } - BadTopKGroupKey { - source: _, - k, - input_type, - } => { - let input_type = columns_pretty(input_type, humanizer); - - writeln!( - f, - "TopK group key component references invalid column {k} in columns: {input_type}" - )? - } - BadTopKOrdering { - source: _, - order, - input_type, - } => { - let col = order.column; - let num_cols = input_type.len(); - let are = if num_cols == 1 { "is" } else { "are" }; - let s = if num_cols == 1 { "" } else { "s" }; - let input_type = columns_pretty(input_type, humanizer); - - // TODO(cloud#8196) - let mode = HumanizedExplain::new(false); - let order = mode.expr(order, None); - - writeln!( - f, - "TopK ordering {order} references invalid column {col}\nthere {are} {num_cols} column{s}: {input_type}" - )? - } - BadLetRecBindings { source: _ } => { - writeln!(f, "LetRec ids and definitions don't line up")? - } - Shadowing { source: _, id } => writeln!(f, "id {id} is shadowed")?, - DisallowedDummy { source: _ } => writeln!(f, "contains a dummy value")?, - Recursion { error } => writeln!(f, "{error}")?, - } - - Ok(()) - } -} diff --git a/src/transform/tests/test_runner.rs b/src/transform/tests/test_runner.rs index 2cf048eaeeacf..c043148e57f57 100644 --- a/src/transform/tests/test_runner.rs +++ b/src/transform/tests/test_runner.rs @@ -34,7 +34,7 @@ mod tests { use mz_transform::dataflow::{ DataflowMetainfo, optimize_dataflow_demand_inner, optimize_dataflow_filters_inner, }; - use mz_transform::{Optimizer, Transform, TransformCtx, reprtypecheck, typecheck}; + use mz_transform::{Optimizer, Transform, TransformCtx, reprtypecheck}; use proc_macro2::TokenTree; use crate::explain::Explainable; @@ -50,12 +50,10 @@ mod tests { fn full_transform_list() -> Vec> { let features = OptimizerFeatures::default(); - let typecheck_ctx = typecheck::empty_context(); let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, - &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, @@ -186,12 +184,10 @@ mod tests { test_type: TestType, ) -> Result { let features = OptimizerFeatures::default(); - let typecheck_ctx = typecheck::empty_context(); let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, - &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, @@ -372,12 +368,10 @@ mod tests { let mut out = String::new(); if test_type == TestType::Opt { let features = OptimizerFeatures::default(); - let typecheck_ctx = typecheck::empty_context(); let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, - &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, @@ -415,12 +409,10 @@ mod tests { }; if test_type == TestType::Opt { let features = OptimizerFeatures::default(); - let typecheck_ctx = typecheck::empty_context(); let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, - &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, diff --git a/src/transform/tests/test_transforms.rs b/src/transform/tests/test_transforms.rs index 451a0a024b4db..f280c1bfc9e6a 100644 --- a/src/transform/tests/test_transforms.rs +++ b/src/transform/tests/test_transforms.rs @@ -266,12 +266,10 @@ fn apply_transform( features.enable_letrec_fixpoint_analysis = true; features.enable_dequadratic_eqprop_map = true; features.enable_eq_classes_withholding_errors = true; - let typecheck_ctx = mz_transform::typecheck::empty_context(); let repr_typecheck_ctx = mz_transform::reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = mz_transform::TransformCtx::local( &features, - &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None,