diff --git a/datafusion-examples/examples/builtin_functions/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs index 7eff0d0b5c484..106c53cdf7f12 100644 --- a/datafusion-examples/examples/builtin_functions/function_factory.rs +++ b/datafusion-examples/examples/builtin_functions/function_factory.rs @@ -24,7 +24,7 @@ use datafusion::error::Result; use datafusion::execution::context::{ FunctionFactory, RegisterFunction, SessionContext, SessionState, }; -use datafusion::logical_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion::logical_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion::logical_expr::{ ColumnarValue, CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, @@ -145,7 +145,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let replacement = Self::replacement(&self.expr, &args)?; diff --git a/datafusion-examples/examples/query_planning/expr_api.rs b/datafusion-examples/examples/query_planning/expr_api.rs index 47de669023f7c..386273c72817b 100644 --- a/datafusion-examples/examples/query_planning/expr_api.rs +++ b/datafusion-examples/examples/query_planning/expr_api.rs @@ -175,8 +175,9 @@ fn simplify_demo() -> Result<()> { // the ExecutionProps carries information needed to simplify // expressions, such as the current time (to evaluate `now()` // correctly) - let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default() + .with_schema(schema) + .with_current_time(); let simplifier = ExprSimplifier::new(context); // And then call the simplify_expr function: @@ -191,7 +192,9 @@ fn simplify_demo() -> Result<()> { // here are some other examples of what DataFusion is capable of let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?; - let context = SimplifyContext::new(&props).with_schema(schema.clone()); + let context = SimplifyContext::default() + .with_schema(Arc::clone(&schema)) + .with_current_time(); let simplifier = ExprSimplifier::new(context); // basic arithmetic simplification @@ -551,7 +554,9 @@ fn type_coercion_demo() -> Result<()> { assert!(physical_expr.evaluate(&batch).is_ok()); // 2. Type coercion with `ExprSimplifier::coerce`. - let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); + let context = SimplifyContext::default() + .with_schema(Arc::new(df_schema.clone())) + .with_current_time(); let simplifier = ExprSimplifier::new(context); let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; let physical_expr = datafusion::physical_expr::create_physical_expr( diff --git a/datafusion-examples/examples/udf/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs index fbb9e652486ce..16d9a9f7cbdf7 100644 --- a/datafusion-examples/examples/udf/advanced_udaf.rs +++ b/datafusion-examples/examples/udf/advanced_udaf.rs @@ -34,7 +34,7 @@ use datafusion::logical_expr::{ Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature, expr::AggregateFunction, function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs}, - simplify::SimplifyInfo, + simplify::SimplifyContext, }; use datafusion::prelude::*; @@ -421,7 +421,7 @@ impl AggregateUDFImpl for SimplifiedGeoMeanUdaf { /// Optionally replaces a UDAF with another expression during query optimization. fn simplify(&self) -> Option { - let simplify = |aggregate_function: AggregateFunction, _: &dyn SimplifyInfo| { + let simplify = |aggregate_function: AggregateFunction, _: &SimplifyContext| { // Replaces the UDAF with `GeoMeanUdaf` as a placeholder example to demonstrate the `simplify` method. // In real-world scenarios, you might create UDFs from built-in expressions. Ok(Expr::AggregateFunction(AggregateFunction::new_udf( diff --git a/datafusion-examples/examples/udf/advanced_udwf.rs b/datafusion-examples/examples/udf/advanced_udwf.rs index e8d3a75b29dec..ffe06eac8f6a6 100644 --- a/datafusion-examples/examples/udf/advanced_udwf.rs +++ b/datafusion-examples/examples/udf/advanced_udwf.rs @@ -32,7 +32,7 @@ use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::function::{ PartitionEvaluatorArgs, WindowFunctionSimplification, WindowUDFFieldArgs, }; -use datafusion::logical_expr::simplify::SimplifyInfo; +use datafusion::logical_expr::simplify::SimplifyContext; use datafusion::logical_expr::{ Expr, LimitEffect, PartitionEvaluator, Signature, WindowFrame, WindowFunctionDefinition, WindowUDF, WindowUDFImpl, @@ -198,7 +198,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { /// this function will simplify `SimplifySmoothItUdf` to `AggregateUDF` for `Avg` /// default implementation will not be called (left as `todo!()`) fn simplify(&self) -> Option { - let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| { + let simplify = |window_function: WindowFunction, _: &SimplifyContext| { Ok(Expr::from(WindowFunction { fun: WindowFunctionDefinition::AggregateUDF(avg_udaf()), params: WindowFunctionParams { diff --git a/datafusion-examples/examples/udf/simple_udtf.rs b/datafusion-examples/examples/udf/simple_udtf.rs index 087b8ba73af5c..18eb8887a34a4 100644 --- a/datafusion-examples/examples/udf/simple_udtf.rs +++ b/datafusion-examples/examples/udf/simple_udtf.rs @@ -28,7 +28,6 @@ use datafusion::common::{ScalarValue, plan_err}; use datafusion::datasource::TableProvider; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::error::Result; -use datafusion::execution::context::ExecutionProps; use datafusion::logical_expr::simplify::SimplifyContext; use datafusion::logical_expr::{Expr, TableType}; use datafusion::optimizer::simplify_expressions::ExprSimplifier; @@ -142,8 +141,7 @@ impl TableFunctionImpl for LocalCsvTableFunc { .get(1) .map(|expr| { // try to simplify the expression, so 1+2 becomes 3, for example - let execution_props = ExecutionProps::new(); - let info = SimplifyContext::new(&execution_props); + let info = SimplifyContext::default(); let expr = ExprSimplifier::new(info).simplify(expr.clone())?; if let Expr::Literal(ScalarValue::Int64(Some(limit)), _) = expr { diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 55a031d870122..f67e7e4517d2b 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1134,6 +1134,12 @@ impl TryFrom for DFSchema { } } +impl From for SchemaRef { + fn from(dfschema: DFSchema) -> Self { + Arc::clone(&dfschema.inner) + } +} + // Hashing refers to a subset of fields considered in PartialEq. impl Hash for DFSchema { fn hash(&self, state: &mut H) { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a769bb01b4354..5ca5c051ff220 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -93,9 +93,9 @@ use datafusion_expr::{ logical_plan::{DdlStatement, Statement}, planner::ExprPlanner, }; -use datafusion_optimizer::Analyzer; use datafusion_optimizer::analyzer::type_coercion::TypeCoercion; use datafusion_optimizer::simplify_expressions::ExprSimplifier; +use datafusion_optimizer::{Analyzer, OptimizerContext}; use datafusion_optimizer::{AnalyzerRule, OptimizerRule}; use datafusion_session::SessionStore; @@ -749,12 +749,19 @@ impl SessionContext { ); } } - // Store the unoptimized plan into the session state. Although storing the - // optimized plan or the physical plan would be more efficient, doing so is - // not currently feasible. This is because `now()` would be optimized to a - // constant value, causing each EXECUTE to yield the same result, which is - // incorrect behavior. - self.state.write().store_prepared(name, fields, input)?; + // Optimize the plan without evaluating expressions like now() + let optimizer_context = OptimizerContext::new_with_config_options( + Arc::clone(self.state().config().options()), + ) + .without_query_execution_start_time(); + let plan = self.state().optimizer().optimize( + Arc::unwrap_or_clone(input), + &optimizer_context, + |_1, _2| {}, + )?; + self.state + .write() + .store_prepared(name, fields, Arc::new(plan))?; self.return_empty_dataframe() } LogicalPlan::Statement(Statement::Execute(execute)) => { @@ -1394,7 +1401,12 @@ impl SessionContext { })?; let state = self.state.read(); - let context = SimplifyContext::new(state.execution_props()); + let context = SimplifyContext::default() + .with_schema(Arc::clone(prepared.plan.schema())) + .with_config_options(Arc::clone(state.config_options())) + .with_query_execution_start_time( + state.execution_props().query_execution_start_time, + ); let simplifier = ExprSimplifier::new(context); // Only allow literals as parameters for now. diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 6a9ebcdf51250..7cdbc77ae90c3 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -57,10 +57,8 @@ use datafusion_expr::planner::ExprPlanner; #[cfg(feature = "sql")] use datafusion_expr::planner::{RelationPlanner, TypePlanner}; use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry}; -use datafusion_expr::simplify::SimplifyInfo; -use datafusion_expr::{ - AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, WindowUDF, -}; +use datafusion_expr::simplify::SimplifyContext; +use datafusion_expr::{AggregateUDF, Explain, Expr, LogicalPlan, ScalarUDF, WindowUDF}; use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_optimizer::{ Analyzer, AnalyzerRule, Optimizer, OptimizerConfig, OptimizerRule, @@ -744,13 +742,18 @@ impl SessionState { expr: Expr, df_schema: &DFSchema, ) -> datafusion_common::Result> { - let simplifier = - ExprSimplifier::new(SessionSimplifyProvider::new(self, df_schema)); + let config_options = self.config_options(); + let simplify_context = SimplifyContext::default() + .with_schema(Arc::new(df_schema.clone())) + .with_config_options(Arc::clone(config_options)) + .with_query_execution_start_time( + self.execution_props().query_execution_start_time, + ); + let simplifier = ExprSimplifier::new(simplify_context); // apply type coercion here to ensure types match let mut expr = simplifier.coerce(expr, df_schema)?; // rewrite Exprs to functions if necessary - let config_options = self.config_options(); for rewrite in self.analyzer.function_rewrites() { expr = expr .transform_up(|expr| rewrite.rewrite(expr, df_schema, config_options))? @@ -1834,9 +1837,12 @@ impl ContextProvider for SessionContextProvider<'_> { .get(name) .cloned() .ok_or_else(|| plan_datafusion_err!("table function '{name}' not found"))?; - let dummy_schema = DFSchema::empty(); - let simplifier = - ExprSimplifier::new(SessionSimplifyProvider::new(self.state, &dummy_schema)); + let simplify_context = SimplifyContext::default() + .with_config_options(Arc::clone(self.state.config_options())) + .with_query_execution_start_time( + self.state.execution_props().query_execution_start_time, + ); + let simplifier = ExprSimplifier::new(simplify_context); let args = args .into_iter() .map(|arg| simplifier.simplify(arg)) @@ -2063,7 +2069,7 @@ impl datafusion_execution::TaskContextProvider for SessionState { } impl OptimizerConfig for SessionState { - fn query_execution_start_time(&self) -> DateTime { + fn query_execution_start_time(&self) -> Option> { self.execution_props.query_execution_start_time } @@ -2115,35 +2121,6 @@ impl QueryPlanner for DefaultQueryPlanner { } } -struct SessionSimplifyProvider<'a> { - state: &'a SessionState, - df_schema: &'a DFSchema, -} - -impl<'a> SessionSimplifyProvider<'a> { - fn new(state: &'a SessionState, df_schema: &'a DFSchema) -> Self { - Self { state, df_schema } - } -} - -impl SimplifyInfo for SessionSimplifyProvider<'_> { - fn is_boolean_type(&self, expr: &Expr) -> datafusion_common::Result { - Ok(expr.get_type(self.df_schema)? == DataType::Boolean) - } - - fn nullable(&self, expr: &Expr) -> datafusion_common::Result { - expr.nullable(self.df_schema) - } - - fn execution_props(&self) -> &ExecutionProps { - self.state.execution_props() - } - - fn get_data_type(&self, expr: &Expr) -> datafusion_common::Result { - expr.get_type(self.df_schema) - } -} - #[derive(Debug)] pub(crate) struct PreparedPlan { /// Data types of the parameters diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 53684e51bc0ba..dba017f83ba1e 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -166,8 +166,7 @@ impl TestParquetFile { let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?; // run coercion on the filters to coerce types etc. - let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(Arc::clone(&df_schema)); + let context = SimplifyContext::default().with_schema(Arc::clone(&df_schema)); if let Some(filter) = maybe_filter { let simplifier = ExprSimplifier::new(context); let filter = simplifier.coerce(filter, &df_schema).unwrap(); diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index 90c1b96749b3c..91dd5de7fcd64 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -24,7 +24,6 @@ use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use datafusion::prelude::*; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::ExprFunctionExt; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr::NullTreatment; use datafusion_expr::simplify::SimplifyContext; use datafusion_functions::core::expr_ext::FieldAccessor; @@ -422,9 +421,7 @@ fn create_simplified_expr_test(expr: Expr, expected_expr: &str) { let df_schema = DFSchema::try_from(batch.schema()).unwrap(); // Simplify the expression first - let props = ExecutionProps::new(); - let simplify_context = - SimplifyContext::new(&props).with_schema(df_schema.clone().into()); + let simplify_context = SimplifyContext::default().with_schema(Arc::new(df_schema)); let simplifier = ExprSimplifier::new(simplify_context).with_max_cycles(10); let simplified = simplifier.simplify(expr).unwrap(); create_expr_test(simplified, expected_expr); diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index a42dfc951da0d..02f2503faf22a 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -23,16 +23,16 @@ use arrow::array::types::IntervalDayTime; use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use chrono::{DateTime, TimeZone, Utc}; -use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; +use datafusion::{error::Result, prelude::*}; use datafusion_common::ScalarValue; use datafusion_common::cast::as_int32_array; use datafusion_common::{DFSchemaRef, ToDFSchema}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::logical_plan::builder::table_scan_with_filters; -use datafusion_expr::simplify::SimplifyInfo; +use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{ - Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, ScalarUDF, - Volatility, table_scan, + Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, Projection, + ScalarUDF, Volatility, table_scan, }; use datafusion_functions::math; use datafusion_optimizer::optimizer::Optimizer; @@ -40,50 +40,6 @@ use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyExpress use datafusion_optimizer::{OptimizerContext, OptimizerRule}; use std::sync::Arc; -/// In order to simplify expressions, DataFusion must have information -/// about the expressions. -/// -/// You can provide that information using DataFusion [DFSchema] -/// objects or from some other implementation -struct MyInfo { - /// The input schema - schema: DFSchemaRef, - - /// Execution specific details needed for constant evaluation such - /// as the current time for `now()` and [VariableProviders] - execution_props: ExecutionProps, -} - -impl SimplifyInfo for MyInfo { - fn is_boolean_type(&self, expr: &Expr) -> Result { - Ok(matches!( - expr.get_type(self.schema.as_ref())?, - DataType::Boolean - )) - } - - fn nullable(&self, expr: &Expr) -> Result { - expr.nullable(self.schema.as_ref()) - } - - fn execution_props(&self) -> &ExecutionProps { - &self.execution_props - } - - fn get_data_type(&self, expr: &Expr) -> Result { - expr.get_type(self.schema.as_ref()) - } -} - -impl From for MyInfo { - fn from(schema: DFSchemaRef) -> Self { - Self { - schema, - execution_props: ExecutionProps::new(), - } - } -} - /// A schema like: /// /// a: Int32 (possibly with nulls) @@ -132,14 +88,10 @@ fn test_evaluate_with_start_time( expected_expr: Expr, date_time: &DateTime, ) { - let execution_props = - ExecutionProps::new().with_query_execution_start_time(*date_time); - - let info: MyInfo = MyInfo { - schema: schema(), - execution_props, - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default() + .with_schema(schema()) + .with_query_execution_start_time(Some(*date_time)); + let simplifier = ExprSimplifier::new(context); let simplified_expr = simplifier .simplify(input_expr.clone()) .expect("successfully evaluated"); @@ -201,7 +153,9 @@ fn to_timestamp_expr(arg: impl Into) -> Expr { #[test] fn basic() { - let info: MyInfo = schema().into(); + let context = SimplifyContext::default() + .with_schema(schema()) + .with_query_execution_start_time(Some(Utc::now())); // The `Expr` is a core concept in DataFusion, and DataFusion can // help simplify it. @@ -210,21 +164,21 @@ fn basic() { // optimize form `a < 5` automatically let expr = col("a").lt(lit(2i32) + lit(3i32)); - let simplifier = ExprSimplifier::new(info); + let simplifier = ExprSimplifier::new(context); let simplified = simplifier.simplify(expr).unwrap(); assert_eq!(simplified, col("a").lt(lit(5i32))); } #[test] fn fold_and_simplify() { - let info: MyInfo = schema().into(); + let context = SimplifyContext::default().with_schema(schema()); // What will it do with the expression `concat('foo', 'bar') == 'foobar')`? let expr = concat(vec![lit("foo"), lit("bar")]).eq(lit("foobar")); // Since datafusion applies both simplification *and* rewriting // some expressions can be entirely simplified - let simplifier = ExprSimplifier::new(info); + let simplifier = ExprSimplifier::new(context); let simplified = simplifier.simplify(expr).unwrap(); assert_eq!(simplified, lit(true)) } @@ -523,6 +477,72 @@ fn multiple_now() -> Result<()> { Ok(()) } +/// Unwraps an alias expression to get the inner expression +fn unrwap_aliases(expr: &Expr) -> &Expr { + match expr { + Expr::Alias(alias) => unrwap_aliases(&alias.expr), + expr => expr, + } +} + +/// Test that `now()` is simplified to a literal when execution start time is set, +/// but remains as an expression when no execution start time is available. +#[test] +fn now_simplification_with_and_without_start_time() { + let plan = LogicalPlanBuilder::empty(false) + .project(vec![now()]) + .unwrap() + .build() + .unwrap(); + + // Case 1: With execution start time set, now() should be simplified to a literal + { + let time = DateTime::::from_timestamp_nanos(123); + let ctx: OptimizerContext = + OptimizerContext::new().with_query_execution_start_time(time); + let optimizer = SimplifyExpressions {}; + let simplified = optimizer + .rewrite(plan.clone(), &ctx) + .expect("rewrite should succeed") + .data; + let LogicalPlan::Projection(Projection { expr, .. }) = simplified else { + panic!("Expected Projection plan"); + }; + assert_eq!(expr.len(), 1); + let simplified = unrwap_aliases(expr.first().unwrap()); + // Should be a literal timestamp + match simplified { + Expr::Literal(ScalarValue::TimestampNanosecond(Some(ts), _), _) => { + assert_eq!(*ts, time.timestamp_nanos_opt().unwrap()); + } + other => panic!("Expected timestamp literal, got: {other:?}"), + } + } + + // Case 2: Without execution start time, now() should remain as a function call + { + let ctx: OptimizerContext = + OptimizerContext::new().without_query_execution_start_time(); + let optimizer = SimplifyExpressions {}; + let simplified = optimizer + .rewrite(plan, &ctx) + .expect("rewrite should succeed") + .data; + let LogicalPlan::Projection(Projection { expr, .. }) = simplified else { + panic!("Expected Projection plan"); + }; + assert_eq!(expr.len(), 1); + let simplified = unrwap_aliases(expr.first().unwrap()); + // Should still be a now() function call + match simplified { + Expr::ScalarFunction(ScalarFunction { func, .. }) => { + assert_eq!(func.name(), "now"); + } + other => panic!("Expected now() function call, got: {other:?}"), + } + } +} + // ------------------------------ // --- Simplifier tests ----- // ------------------------------ @@ -545,11 +565,8 @@ fn expr_test_schema() -> DFSchemaRef { } fn test_simplify(input_expr: Expr, expected_expr: Expr) { - let info: MyInfo = MyInfo { - schema: expr_test_schema(), - execution_props: ExecutionProps::new(), - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default().with_schema(expr_test_schema()); + let simplifier = ExprSimplifier::new(context); let simplified_expr = simplifier .simplify(input_expr.clone()) .expect("successfully evaluated"); @@ -564,11 +581,10 @@ fn test_simplify_with_cycle_count( expected_expr: Expr, expected_count: u32, ) { - let info: MyInfo = MyInfo { - schema: expr_test_schema(), - execution_props: ExecutionProps::new(), - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default() + .with_schema(expr_test_schema()) + .with_query_execution_start_time(Some(Utc::now())); + let simplifier = ExprSimplifier::new(context); let (simplified_expr, count) = simplifier .simplify_with_cycle_count_transformed(input_expr.clone()) .expect("successfully evaluated"); diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index b86cd94a8a9b7..24cade1e80d5a 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -42,7 +42,7 @@ use datafusion_common::{ assert_batches_sorted_eq, assert_contains, exec_datafusion_err, exec_err, not_impl_err, plan_err, }; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, @@ -699,7 +699,7 @@ impl ScalarUDFImpl for CastToI64UDF { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // DataFusion should have ensured the function is called with just a // single argument @@ -975,7 +975,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let replacement = Self::replacement(&self.expr, &args, &self.defaults)?; diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs index 20d8f82bf48a2..3bf6978eb60ee 100644 --- a/datafusion/expr/src/execution_props.rs +++ b/datafusion/expr/src/execution_props.rs @@ -16,7 +16,7 @@ // under the License. use crate::var_provider::{VarProvider, VarType}; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{DateTime, Utc}; use datafusion_common::HashMap; use datafusion_common::alias::AliasGenerator; use datafusion_common::config::ConfigOptions; @@ -33,7 +33,9 @@ use std::sync::Arc; /// done so during predicate pruning and expression simplification #[derive(Clone, Debug)] pub struct ExecutionProps { - pub query_execution_start_time: DateTime, + /// The time at which the query execution started. If `None`, + /// functions like `now()` will not be simplified during optimization. + pub query_execution_start_time: Option>, /// Alias generator used by subquery optimizer rules pub alias_generator: Arc, /// Snapshot of config options when the query started @@ -52,9 +54,7 @@ impl ExecutionProps { /// Creates a new execution props pub fn new() -> Self { ExecutionProps { - // Set this to a fixed sentinel to make it obvious if this is - // not being updated / propagated correctly - query_execution_start_time: Utc.timestamp_nanos(0), + query_execution_start_time: None, alias_generator: Arc::new(AliasGenerator::new()), config_options: None, var_providers: None, @@ -66,7 +66,7 @@ impl ExecutionProps { mut self, query_execution_start_time: DateTime, ) -> Self { - self.query_execution_start_time = query_execution_start_time; + self.query_execution_start_time = Some(query_execution_start_time); self } @@ -79,7 +79,7 @@ impl ExecutionProps { /// Marks the execution of query started timestamp. /// This also instantiates a new alias generator. pub fn mark_start_execution(&mut self, config_options: Arc) -> &Self { - self.query_execution_start_time = Utc::now(); + self.query_execution_start_time = Some(Utc::now()); self.alias_generator = Arc::new(AliasGenerator::new()); self.config_options = Some(config_options); &*self @@ -126,7 +126,7 @@ mod test { fn debug() { let props = ExecutionProps::new(); assert_eq!( - "ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", + "ExecutionProps { query_execution_start_time: None, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", format!("{props:?}") ); } diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index e0235d32292fa..68d2c9073241b 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -67,25 +67,25 @@ pub type StateTypeFunction = /// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure /// A closure with two arguments: /// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyInfo] +/// * 'info': [crate::simplify::SimplifyContext] /// /// Closure returns simplified [Expr] or an error. pub type AggregateFunctionSimplification = Box< dyn Fn( crate::expr::AggregateFunction, - &dyn crate::simplify::SimplifyInfo, + &crate::simplify::SimplifyContext, ) -> Result, >; /// [crate::udwf::WindowUDFImpl::simplify] simplifier closure /// A closure with two arguments: /// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyInfo] +/// * 'info': [crate::simplify::SimplifyContext] /// /// Closure returns simplified [Expr] or an error. pub type WindowFunctionSimplification = Box< dyn Fn( crate::expr::WindowFunction, - &dyn crate::simplify::SimplifyInfo, + &crate::simplify::SimplifyContext, ) -> Result, >; diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index bbe65904fb775..8c68067a55a37 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -15,92 +15,98 @@ // specific language governing permissions and limitations // under the License. -//! Structs and traits to provide the information needed for expression simplification. +//! Structs to provide the information needed for expression simplification. + +use std::sync::Arc; use arrow::datatypes::DataType; -use datafusion_common::{DFSchemaRef, Result, internal_datafusion_err}; +use chrono::{DateTime, Utc}; +use datafusion_common::config::ConfigOptions; +use datafusion_common::{DFSchema, DFSchemaRef, Result}; -use crate::{Expr, ExprSchemable, execution_props::ExecutionProps}; +use crate::{Expr, ExprSchemable}; -/// Provides the information necessary to apply algebraic simplification to an -/// [Expr]. See [SimplifyContext] for one concrete implementation. -/// -/// This trait exists so that other systems can plug schema -/// information in without having to create `DFSchema` objects. If you -/// have a [`DFSchemaRef`] you can use [`SimplifyContext`] -pub trait SimplifyInfo { - /// Returns true if this Expr has boolean type - fn is_boolean_type(&self, expr: &Expr) -> Result; - - /// Returns true of this expr is nullable (could possibly be NULL) - fn nullable(&self, expr: &Expr) -> Result; - - /// Returns details needed for partial expression evaluation - fn execution_props(&self) -> &ExecutionProps; - - /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> Result; -} - -/// Provides simplification information based on DFSchema and -/// [`ExecutionProps`]. This is the default implementation used by DataFusion +/// Provides simplification information based on schema, query execution time, +/// and configuration options. /// /// # Example /// See the `simplify_demo` in the [`expr_api` example] /// /// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs #[derive(Debug, Clone)] -pub struct SimplifyContext<'a> { - schema: Option, - props: &'a ExecutionProps, +pub struct SimplifyContext { + schema: DFSchemaRef, + query_execution_start_time: Option>, + config_options: Arc, } -impl<'a> SimplifyContext<'a> { - /// Create a new SimplifyContext - pub fn new(props: &'a ExecutionProps) -> Self { +impl Default for SimplifyContext { + fn default() -> Self { Self { - schema: None, - props, + schema: Arc::new(DFSchema::empty()), + query_execution_start_time: None, + config_options: Arc::new(ConfigOptions::default()), } } +} + +impl SimplifyContext { + /// Set the [`ConfigOptions`] for this context + pub fn with_config_options(mut self, config_options: Arc) -> Self { + self.config_options = config_options; + self + } - /// Register a [`DFSchemaRef`] with this context + /// Set the schema for this context pub fn with_schema(mut self, schema: DFSchemaRef) -> Self { - self.schema = Some(schema); + self.schema = schema; self } -} -impl SimplifyInfo for SimplifyContext<'_> { - /// Returns true if this Expr has boolean type - fn is_boolean_type(&self, expr: &Expr) -> Result { - if let Some(schema) = &self.schema - && let Ok(DataType::Boolean) = expr.get_type(schema) - { - return Ok(true); - } + /// Set the query execution start time + pub fn with_query_execution_start_time( + mut self, + query_execution_start_time: Option>, + ) -> Self { + self.query_execution_start_time = query_execution_start_time; + self + } - Ok(false) + /// Set the query execution start to the current time + pub fn with_current_time(mut self) -> Self { + self.query_execution_start_time = Some(Utc::now()); + self + } + + /// Returns the schema + pub fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + /// Returns true if this Expr has boolean type + pub fn is_boolean_type(&self, expr: &Expr) -> Result { + Ok(expr.get_type(&self.schema)? == DataType::Boolean) } /// Returns true if expr is nullable - fn nullable(&self, expr: &Expr) -> Result { - let schema = self.schema.as_ref().ok_or_else(|| { - internal_datafusion_err!("attempt to get nullability without schema") - })?; - expr.nullable(schema.as_ref()) + pub fn nullable(&self, expr: &Expr) -> Result { + expr.nullable(self.schema.as_ref()) } /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> Result { - let schema = self.schema.as_ref().ok_or_else(|| { - internal_datafusion_err!("attempt to get data type without schema") - })?; - expr.get_type(schema) + pub fn get_data_type(&self, expr: &Expr) -> Result { + expr.get_type(&self.schema) + } + + /// Returns the time at which the query execution started. + /// If `None`, time-dependent functions like `now()` will not be simplified. + pub fn query_execution_start_time(&self) -> Option> { + self.query_execution_start_time } - fn execution_props(&self) -> &ExecutionProps { - self.props + /// Returns the configuration options for the session. + pub fn config_options(&self) -> &Arc { + &self.config_options } } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index a69176e1173a5..ee38077dbf304 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -668,7 +668,7 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// Or, a closure with two arguments: /// * 'aggregate_function': [AggregateFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyInfo] + /// * 'info': [crate::simplify::SimplifyContext] /// /// closure returns simplified [Expr] or an error. /// diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 28a07ad761012..0654370ac7ebf 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -19,7 +19,7 @@ use crate::async_udf::AsyncScalarUDF; use crate::expr::schema_name_from_exprs_comma_separated_without_space; -use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; +use crate::simplify::{ExprSimplifyResult, SimplifyContext}; use crate::sort_properties::{ExprProperties, SortProperties}; use crate::udf_eq::UdfEq; use crate::{ColumnarValue, Documentation, Expr, Signature}; @@ -221,7 +221,7 @@ impl ScalarUDF { pub fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.inner.simplify(args, info) } @@ -691,7 +691,7 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { Ok(ExprSimplifyResult::Original(args)) } @@ -921,7 +921,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.inner.simplify(args, info) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 37055daa1ca4f..8f2b8a0d9bfe5 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -362,7 +362,7 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// Or, a closure with two arguments: /// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyInfo] + /// * 'info': [crate::simplify::SimplifyContext] /// /// # Notes /// The returned expression must have the same schema as the original diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs index a4e8332626b00..c82f03a3b5f0e 100644 --- a/datafusion/functions-aggregate/src/percentile_cont.rs +++ b/datafusion/functions-aggregate/src/percentile_cont.rs @@ -48,7 +48,7 @@ use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_expr::{ expr::{AggregateFunction, Sort}, function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs}, - simplify::SimplifyInfo, + simplify::SimplifyContext, }; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask; @@ -309,7 +309,7 @@ fn get_percentile(args: &AccumulatorArgs) -> Result { fn simplify_percentile_cont_aggregate( aggregate_function: AggregateFunction, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { enum PercentileRewriteTarget { Min, diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 54b94abafb999..97671d4a95f23 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArrayHas { fn simplify( &self, mut args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { let [haystack, needle] = take_function_args(self.name(), &mut args)?; @@ -684,8 +684,8 @@ mod tests { utils::SingleRowListArrayBuilder, }; use datafusion_expr::{ - ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, - execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, lit, + simplify::ExprSimplifyResult, }; use crate::expr_fn::make_array; @@ -701,8 +701,7 @@ mod tests { .build_list_scalar()); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -725,8 +724,7 @@ mod tests { let haystack = make_array(vec![lit(1), lit(2), lit(3)]); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -749,8 +747,7 @@ mod tests { let haystack = Expr::Literal(ScalarValue::Null, None); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(simplified)) = ArrayHas::new().simplify(vec![haystack, needle], &context) else { @@ -767,8 +764,7 @@ mod tests { let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(simplified)) = ArrayHas::new().simplify(vec![haystack, needle], &context) else { @@ -783,8 +779,7 @@ mod tests { let haystack = col("c1"); let needle = col("c2"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Original(args)) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index 04189c0c6f363..7c24450adf183 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -26,7 +26,7 @@ use datafusion_common::{ use datafusion_common::{exec_datafusion_err, utils::take_function_args}; use std::any::Any; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, @@ -160,7 +160,7 @@ impl ScalarUDFImpl for ArrowCastFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // convert this into a real cast let target_type = data_type_from_args(&args)?; diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index 1404f68570974..359a6f6c9c84c 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -19,7 +19,7 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{Result, exec_err, internal_err, plan_err}; use datafusion_expr::binary::try_type_union_resolution; use datafusion_expr::conditional_expressions::CaseBuilder; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, }; @@ -97,7 +97,7 @@ impl ScalarUDFImpl for CoalesceFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { if args.is_empty() { return plan_err!("coalesce must have at least one argument"); diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index 3e961e4da4e75..47a903639dde5 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -421,7 +421,7 @@ impl ScalarUDFImpl for GetFieldFunc { fn simplify( &self, args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { // Need at least 2 args (base + field) if args.len() < 2 { diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs index 0b9968a88fc95..0b4966d4fbdce 100644 --- a/datafusion/functions/src/core/nvl.rs +++ b/datafusion/functions/src/core/nvl.rs @@ -18,7 +18,7 @@ use crate::core::coalesce::CoalesceFunc; use arrow::datatypes::{DataType, FieldRef}; use datafusion_common::Result; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -124,7 +124,7 @@ impl ScalarUDFImpl for NVLFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.coalesce.simplify(args, info) } diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs index eda59fe07f57e..0b092c44d502b 100644 --- a/datafusion/functions/src/core/nvl2.rs +++ b/datafusion/functions/src/core/nvl2.rs @@ -21,7 +21,7 @@ use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, conditional_expressions::CaseBuilder, - simplify::{ExprSimplifyResult, SimplifyInfo}, + simplify::{ExprSimplifyResult, SimplifyContext}, type_coercion::binary::comparison_coercion, }; use datafusion_macros::user_doc; @@ -108,7 +108,7 @@ impl ScalarUDFImpl for NVL2Func { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let [test, if_non_null, if_null] = take_function_args(self.name(), args)?; diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 7edc1a58d9cb5..3e38772720979 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType::Date32; use chrono::{Datelike, NaiveDate, TimeZone}; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -99,23 +99,20 @@ impl ScalarUDFImpl for CurrentDateFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info.execution_props().query_execution_start_time; + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; // Get timezone from config and convert to local time let days = info - .execution_props() .config_options() - .and_then(|config| { - config - .execution - .time_zone - .as_ref() - .map(|tz| tz.parse::().ok()) - }) - .flatten() + .execution + .time_zone + .as_ref() + .and_then(|tz| tz.parse::().ok()) .map_or_else( || datetime_to_days(&now_ts), |tz| { diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 2c9bcdfe49db3..855c0c13dc6b4 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -22,7 +22,7 @@ use arrow::datatypes::TimeUnit::Nanosecond; use chrono::TimeZone; use chrono::Timelike; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -95,23 +95,20 @@ impl ScalarUDFImpl for CurrentTimeFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info.execution_props().query_execution_start_time; + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; // Try to get timezone from config and convert to local time let nano = info - .execution_props() .config_options() - .and_then(|config| { - config - .execution - .time_zone - .as_ref() - .map(|tz| tz.parse::().ok()) - }) - .flatten() + .execution + .time_zone + .as_ref() + .and_then(|tz| tz.parse::().ok()) .map_or_else( || datetime_to_time_nanos(&now_ts), |tz| { @@ -143,46 +140,24 @@ fn datetime_to_time_nanos(dt: &chrono::DateTime) -> Option Result { - Ok(false) - } - - fn nullable(&self, _expr: &Expr) -> Result { - Ok(true) - } - - fn execution_props(&self) -> &ExecutionProps { - &self.execution_props - } - - fn get_data_type(&self, _expr: &Expr) -> Result { - Ok(Time64(Nanosecond)) - } - } - - fn set_session_timezone_env(tz: &str, start_time: DateTime) -> MockSimplifyInfo { - let mut config = datafusion_common::config::ConfigOptions::default(); + fn set_session_timezone_env(tz: &str, start_time: DateTime) -> SimplifyContext { + let mut config = ConfigOptions::default(); config.execution.time_zone = if tz.is_empty() { None } else { Some(tz.to_string()) }; - let mut execution_props = - ExecutionProps::new().with_query_execution_start_time(start_time); - execution_props.config_options = Some(Arc::new(config)); - MockSimplifyInfo { execution_props } + let schema = Arc::new(DFSchema::empty()); + SimplifyContext::default() + .with_schema(schema) + .with_config_options(Arc::new(config)) + .with_query_execution_start_time(Some(start_time)) } #[test] diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index b804efe59106d..338a62a118f31 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, @@ -121,16 +121,18 @@ impl ScalarUDFImpl for NowFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info - .execution_props() - .query_execution_start_time - .timestamp_nanos_opt(); + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; Ok(ExprSimplifyResult::Simplified(Expr::Literal( - ScalarValue::TimestampNanosecond(now_ts, self.timezone.clone()), + ScalarValue::TimestampNanosecond( + now_ts.timestamp_nanos_opt(), + self.timezone.clone(), + ), None, ))) } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 0c50afa2dffd3..fb57f24257e5b 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -36,7 +36,7 @@ use datafusion_common::{ Result, ScalarValue, exec_err, internal_err, plan_datafusion_err, plan_err, }; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF, @@ -343,7 +343,7 @@ impl ScalarUDFImpl for LogFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { let mut arg_types = args .iter() @@ -430,7 +430,6 @@ fn is_pow(func: &ScalarUDF) -> bool { #[cfg(test)] mod tests { - use std::collections::HashMap; use std::sync::Arc; use super::*; @@ -440,10 +439,8 @@ mod tests { }; use arrow::compute::SortOptions; use arrow::datatypes::{DECIMAL256_MAX_PRECISION, Field}; - use datafusion_common::DFSchema; use datafusion_common::cast::{as_float32_array, as_float64_array}; use datafusion_common::config::ConfigOptions; - use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; #[test] @@ -784,10 +781,7 @@ mod tests { #[test] // Test log() simplification errors fn test_log_simplify_errors() { - let props = ExecutionProps::new(); - let schema = - Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default(); // Expect 0 args to error let _ = LogFunc::new().simplify(vec![], &context).unwrap_err(); // Expect 3 args to error @@ -799,10 +793,7 @@ mod tests { #[test] // Test that non-simplifiable log() expressions are unchanged after simplification fn test_log_simplify_original() { - let props = ExecutionProps::new(); - let schema = - Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default(); // One argument with no simplifications let result = LogFunc::new().simplify(vec![lit(2)], &context).unwrap(); let ExprSimplifyResult::Original(args) = result else { diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 33166f6444f2a..fafadd3ba4477 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -31,7 +31,7 @@ use datafusion_common::types::{NativeType, logical_float64, logical_int64}; use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, lit, @@ -346,7 +346,7 @@ impl ScalarUDFImpl for PowerFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { let [base, exponent] = take_function_args("power", args)?; let base_type = info.get_data_type(&base)?; diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index f707c8e0d8c7f..bc06d462c04bd 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -31,7 +31,7 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr_common::operator::Operator; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use std::any::Any; @@ -158,7 +158,7 @@ impl ScalarUDFImpl for RegexpLikeFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // Try to simplify regexp_like usage to one of the builtin operators since those have // optimized code paths for the case where the regular expression pattern is a scalar. diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 42d455a05760a..80de194ad15e3 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -28,7 +28,7 @@ use crate::strings::{ use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{Result, ScalarValue, internal_err, plan_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_macros::user_doc; @@ -277,7 +277,7 @@ impl ScalarUDFImpl for ConcatFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { simplify_concat(args) } diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 8fe095c5ce2be..8ed3ade968cc2 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -28,7 +28,7 @@ use crate::strings::{ColumnarValueRef, StringArrayBuilder}; use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_macros::user_doc; @@ -301,7 +301,7 @@ impl ScalarUDFImpl for ConcatWsFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { match &args[..] { [delimiter, vals @ ..] => simplify_concat_ws(delimiter, vals), diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index c38a5bffcb2be..e50bd9f657669 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -21,14 +21,13 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Scalar}; use arrow::compute::kernels::comparison::starts_with as arrow_starts_with; use arrow::datatypes::DataType; +use datafusion_common::types::logical_string; use datafusion_common::utils::take_function_args; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_common::{Result, ScalarValue, exec_err}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::type_coercion::binary::{ binary_to_string_coercion, string_coercion, }; - -use datafusion_common::types::logical_string; -use datafusion_common::{Result, ScalarValue, exec_err}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, cast, @@ -164,7 +163,7 @@ impl ScalarUDFImpl for StartsWithFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { if let Expr::Literal(scalar_value, _) = &args[1] { // Convert starts_with(col, 'prefix') to col LIKE 'prefix%' with proper escaping diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index e8a9c8c83ae93..52d777f874fa8 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -36,7 +36,6 @@ use datafusion_expr::{ BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan, LogicalPlanBuilder, Operator, expr, lit, }; -use datafusion_physical_expr::execution_props::ExecutionProps; /// This struct rewrite the sub query plan by pull up the correlated /// expressions(contains outer reference columns) from the inner subquery's @@ -509,8 +508,7 @@ fn agg_exprs_evaluation_result_on_empty_batch( .data()?; let result_expr = result_expr.unalias(); - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = SimplifyContext::default().with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; expr_result_map_for_count_bug.insert(e.schema_name().to_string(), result_expr); @@ -543,8 +541,7 @@ fn proj_exprs_evaluation_result_on_empty_batch( .data()?; if result_expr.ne(expr) { - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = SimplifyContext::default().with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; let expr_name = match expr { @@ -584,8 +581,7 @@ fn filter_exprs_evaluation_result_on_empty_batch( .data()?; let pull_up_expr = if result_expr.ne(filter_expr) { - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(schema); + let info = SimplifyContext::default().with_schema(schema); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; match &result_expr { diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index ededcec0a47c9..8740ab072a1f5 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -100,8 +100,9 @@ pub trait OptimizerRule: Debug { /// Options to control the DataFusion Optimizer. pub trait OptimizerConfig { /// Return the time at which the query execution started. This - /// time is used as the value for now() - fn query_execution_start_time(&self) -> DateTime; + /// time is used as the value for `now()`. If `None`, time-dependent + /// functions like `now()` will not be simplified during optimization. + fn query_execution_start_time(&self) -> Option>; /// Return alias generator used to generate unique aliases for subqueries fn alias_generator(&self) -> &Arc; @@ -118,8 +119,9 @@ pub trait OptimizerConfig { #[derive(Debug)] pub struct OptimizerContext { /// Query execution start time that can be used to rewrite - /// expressions such as `now()` to use a literal value instead - query_execution_start_time: DateTime, + /// expressions such as `now()` to use a literal value instead. + /// If `None`, time-dependent functions will not be simplified. + query_execution_start_time: Option>, /// Alias generator used to generate unique aliases for subqueries alias_generator: Arc, @@ -139,7 +141,7 @@ impl OptimizerContext { /// Create a optimizer config with provided [ConfigOptions]. pub fn new_with_config_options(options: Arc) -> Self { Self { - query_execution_start_time: Utc::now(), + query_execution_start_time: Some(Utc::now()), alias_generator: Arc::new(AliasGenerator::new()), options, } @@ -153,13 +155,19 @@ impl OptimizerContext { self } - /// Specify whether the optimizer should skip rules that produce - /// errors, or fail the query + /// Set the query execution start time pub fn with_query_execution_start_time( mut self, - query_execution_tart_time: DateTime, + query_execution_start_time: DateTime, ) -> Self { - self.query_execution_start_time = query_execution_tart_time; + self.query_execution_start_time = Some(query_execution_start_time); + self + } + + /// Clear the query execution start time. When `None`, time-dependent + /// functions like `now()` will not be simplified during optimization. + pub fn without_query_execution_start_time(mut self) -> Self { + self.query_execution_start_time = None; self } @@ -185,7 +193,7 @@ impl Default for OptimizerContext { } impl OptimizerConfig for OptimizerContext { - fn query_execution_start_time(&self) -> DateTime { + fn query_execution_start_time(&self) -> Option> { self.query_execution_start_time } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 01de44cee1f60..55bff5849c5cb 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -27,6 +27,7 @@ use std::collections::HashSet; use std::ops::Not; use std::sync::Arc; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err, }; @@ -51,7 +52,7 @@ use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionP use super::inlist_simplifier::ShortenInListSimplifier; use super::utils::*; use crate::analyzer::type_coercion::TypeCoercionRewriter; -use crate::simplify_expressions::SimplifyInfo; +use crate::simplify_expressions::SimplifyContext; use crate::simplify_expressions::regex::simplify_regex_expr; use crate::simplify_expressions::unwrap_cast::{ is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary, @@ -72,7 +73,6 @@ use regex::Regex; /// ``` /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{DataFusionError, ToDFSchema}; -/// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -83,8 +83,7 @@ use regex::Regex; /// .unwrap(); /// /// // Create the simplifier -/// let props = ExecutionProps::new(); -/// let context = SimplifyContext::new(&props).with_schema(schema); +/// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Use the simplifier @@ -96,8 +95,8 @@ use regex::Regex; /// let simplified = simplifier.simplify(expr).unwrap(); /// assert_eq!(simplified, col("b").lt(lit(2))); /// ``` -pub struct ExprSimplifier { - info: S, +pub struct ExprSimplifier { + info: SimplifyContext, /// Guarantees about the values of columns. This is provided by the user /// in [ExprSimplifier::with_guarantees()]. guarantees: Vec<(Expr, NullableInterval)>, @@ -111,13 +110,12 @@ pub struct ExprSimplifier { pub const THRESHOLD_INLINE_INLIST: usize = 3; pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3; -impl ExprSimplifier { - /// Create a new `ExprSimplifier` with the given `info` such as an - /// instance of [`SimplifyContext`]. See - /// [`simplify`](Self::simplify) for an example. +impl ExprSimplifier { + /// Create a new `ExprSimplifier` with the given [`SimplifyContext`]. + /// See [`simplify`](Self::simplify) for an example. /// /// [`SimplifyContext`]: datafusion_expr::simplify::SimplifyContext - pub fn new(info: S) -> Self { + pub fn new(info: SimplifyContext) -> Self { Self { info, guarantees: vec![], @@ -142,40 +140,21 @@ impl ExprSimplifier { /// `b > 2` /// /// ``` - /// use arrow::datatypes::DataType; - /// use datafusion_common::DFSchema; + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_common::{DFSchema, ToDFSchema}; /// use datafusion_common::Result; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; - /// use datafusion_expr::simplify::SimplifyInfo; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// use std::sync::Arc; /// - /// /// Simple implementation that provides `Simplifier` the information it needs - /// /// See SimplifyContext for a structure that does this. - /// #[derive(Default)] - /// struct Info { - /// execution_props: ExecutionProps, - /// }; - /// - /// impl SimplifyInfo for Info { - /// fn is_boolean_type(&self, expr: &Expr) -> Result { - /// Ok(false) - /// } - /// fn nullable(&self, expr: &Expr) -> Result { - /// Ok(true) - /// } - /// fn execution_props(&self) -> &ExecutionProps { - /// &self.execution_props - /// } - /// fn get_data_type(&self, expr: &Expr) -> Result { - /// Ok(DataType::Int32) - /// } - /// } - /// + /// // Create a schema and SimplifyContext + /// let schema = Schema::new(vec![Field::new("b", DataType::Int32, true)]) + /// .to_dfschema_ref() + /// .unwrap(); /// // Create the simplifier - /// let simplifier = ExprSimplifier::new(Info::default()); + /// let context = SimplifyContext::default().with_schema(schema); + /// let simplifier = ExprSimplifier::new(context); /// /// // b < 2 /// let b_lt_2 = col("b").gt(lit(2)); @@ -225,7 +204,8 @@ impl ExprSimplifier { mut expr: Expr, ) -> Result<(Transformed, u32)> { let mut simplifier = Simplifier::new(&self.info); - let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?; + let config_options = Some(Arc::clone(self.info.config_options())); + let mut const_evaluator = ConstEvaluator::try_new(config_options)?; let mut shorten_in_list_simplifier = ShortenInListSimplifier::new(); let guarantees_map: HashMap<&Expr, &NullableInterval> = self.guarantees.iter().map(|(k, v)| (k, v)).collect(); @@ -287,7 +267,6 @@ impl ExprSimplifier { /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit, Expr}; @@ -302,8 +281,7 @@ impl ExprSimplifier { /// .unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props).with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5) /// let expr_x = col("x").gt_eq(lit(3_i64)); @@ -349,7 +327,6 @@ impl ExprSimplifier { /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit, Expr}; @@ -364,8 +341,7 @@ impl ExprSimplifier { /// .unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props).with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Expression: a = c AND 1 = b @@ -410,7 +386,6 @@ impl ExprSimplifier { /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// @@ -420,9 +395,7 @@ impl ExprSimplifier { /// .to_dfschema_ref().unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) - /// .with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Expression: a IS NOT NULL @@ -500,7 +473,7 @@ impl TreeNodeRewriter for Canonicalizer { /// /// Note it does not handle algebraic rewrites such as `(a or false)` /// --> `a`, which is handled by [`Simplifier`] -struct ConstEvaluator<'a> { +struct ConstEvaluator { /// `can_evaluate` is used during the depth-first-search of the /// `Expr` tree to track if any siblings (or their descendants) were /// non evaluatable (e.g. had a column reference or volatile @@ -514,8 +487,13 @@ struct ConstEvaluator<'a> { /// means there were no non evaluatable siblings (or their /// descendants) so this `Expr` can be evaluated can_evaluate: Vec, - - execution_props: &'a ExecutionProps, + /// Execution properties needed to call [`create_physical_expr`]. + /// `ConstEvaluator` only evaluates expressions without column references + /// (i.e. constant expressions) and doesn't use the variable binding features + /// of `ExecutionProps` (we explicitly filter out [`Expr::ScalarVariable`]). + /// The `config_options` are passed from the session to allow scalar functions + /// to access configuration like timezone. + execution_props: ExecutionProps, input_schema: DFSchema, input_batch: RecordBatch, } @@ -530,7 +508,7 @@ enum ConstSimplifyResult { SimplifyRuntimeError(DataFusionError, Expr), } -impl TreeNodeRewriter for ConstEvaluator<'_> { +impl TreeNodeRewriter for ConstEvaluator { type Node = Expr; fn f_down(&mut self, expr: Expr) -> Result> { @@ -593,11 +571,17 @@ impl TreeNodeRewriter for ConstEvaluator<'_> { } } -impl<'a> ConstEvaluator<'a> { - /// Create a new `ConstantEvaluator`. Session constants (such as - /// the time for `now()` are taken from the passed - /// `execution_props`. - pub fn try_new(execution_props: &'a ExecutionProps) -> Result { +impl ConstEvaluator { + /// Create a new `ConstantEvaluator`. + /// + /// Note: `ConstEvaluator` filters out expressions with scalar variables + /// (like `$var`) and volatile functions, so it creates its own default + /// `ExecutionProps` internally. The filtered expressions will be evaluated + /// at runtime where proper variable bindings are available. + /// + /// The `config_options` parameter is used to pass session configuration + /// (like timezone) to scalar functions during constant evaluation. + pub fn try_new(config_options: Option>) -> Result { // The dummy column name is unused and doesn't matter as only // expressions without column references can be evaluated static DUMMY_COL_NAME: &str = "."; @@ -611,6 +595,9 @@ impl<'a> ConstEvaluator<'a> { let col = new_null_array(&DataType::Null, 1); let input_batch = RecordBatch::try_new(schema, vec![col])?; + let mut execution_props = ExecutionProps::new(); + execution_props.config_options = config_options; + Ok(Self { can_evaluate: vec![], execution_props, @@ -684,11 +671,14 @@ impl<'a> ConstEvaluator<'a> { return ConstSimplifyResult::NotSimplified(s, m); } - let phys_expr = - match create_physical_expr(&expr, &self.input_schema, self.execution_props) { - Ok(e) => e, - Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), - }; + let phys_expr = match create_physical_expr( + &expr, + &self.input_schema, + &self.execution_props, + ) { + Ok(e) => e, + Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), + }; let metadata = phys_expr .return_field(self.input_batch.schema_ref()) .ok() @@ -745,17 +735,17 @@ impl<'a> ConstEvaluator<'a> { /// * `false = true` and `true = false` to `false` /// * `!!expr` to `expr` /// * `expr = null` and `expr != null` to `null` -struct Simplifier<'a, S> { - info: &'a S, +struct Simplifier<'a> { + info: &'a SimplifyContext, } -impl<'a, S> Simplifier<'a, S> { - pub fn new(info: &'a S) -> Self { +impl<'a> Simplifier<'a> { + pub fn new(info: &'a SimplifyContext) -> Self { Self { info } } } -impl TreeNodeRewriter for Simplifier<'_, S> { +impl TreeNodeRewriter for Simplifier<'_> { type Node = Expr; /// rewrite the expression simplifying any constant expressions @@ -2117,7 +2107,7 @@ fn inlist_except(mut l1: InList, l2: &InList) -> Result { } /// Returns expression testing a boolean `expr` for being exactly `true` (not `false` or NULL). -fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result { +fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result { if !info.nullable(&expr)? { Ok(expr) } else { @@ -2133,8 +2123,8 @@ fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result { // A / 1 -> A // // Move this function body out of the large match branch avoid stack overflow -fn simplify_right_is_one_case( - info: &S, +fn simplify_right_is_one_case( + info: &SimplifyContext, left: Box, op: &Operator, right: &Expr, @@ -2187,9 +2177,8 @@ mod tests { // ------------------------------ #[test] fn api_basic() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); let expr = lit(1) + lit(2); let expected = lit(3); @@ -2199,9 +2188,8 @@ mod tests { #[test] fn basic_coercion() { let schema = test_schema(); - let props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(&schema)), + SimplifyContext::default().with_schema(Arc::clone(&schema)), ); // Note expr type is int32 (not int64) @@ -2229,9 +2217,8 @@ mod tests { #[test] fn simplify_and_constant_prop() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); // should be able to simplify to false // (i * (1 - 2)) > 0 @@ -2242,9 +2229,8 @@ mod tests { #[test] fn simplify_and_constant_prop_with_case() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); // CASE // WHEN i>5 AND false THEN i > 5 @@ -3358,18 +3344,15 @@ mod tests { fn try_simplify(expr: Expr) -> Result { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); simplifier.simplify(expr) } fn coerce(expr: Expr) -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)), + SimplifyContext::default().with_schema(Arc::clone(&schema)), ); simplifier.coerce(expr, schema.as_ref()).unwrap() } @@ -3380,10 +3363,8 @@ mod tests { fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?; Ok((expr.data, count)) } @@ -3397,11 +3378,9 @@ mod tests { guarantees: Vec<(Expr, NullableInterval)>, ) -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ) - .with_guarantees(guarantees); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)) + .with_guarantees(guarantees); simplifier.simplify(expr).unwrap() } @@ -4303,8 +4282,7 @@ mod tests { fn just_simplifier_simplify_null_in_empty_inlist() { let simplify = |expr: Expr| -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let info = SimplifyContext::new(&execution_props).with_schema(schema); + let info = SimplifyContext::default().with_schema(schema); let simplifier = &mut Simplifier::new(&info); expr.rewrite(simplifier) .expect("Failed to simplify expression") @@ -4670,10 +4648,9 @@ mod tests { #[test] fn simplify_common_factor_conjunction_in_disjunction() { - let props = ExecutionProps::new(); let schema = boolean_test_schema(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let a = || col("A"); let b = || col("B"); @@ -5003,9 +4980,8 @@ mod tests { // The simplification should now fail with an error at plan time let schema = test_schema(); - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let result = simplifier.simplify(expr); assert!(result.is_err(), "Expected error for invalid cast"); let err_msg = result.unwrap_err().to_string(); diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs index 58a4eadb5c078..3ab76119cca84 100644 --- a/datafusion/optimizer/src/simplify_expressions/mod.rs +++ b/datafusion/optimizer/src/simplify_expressions/mod.rs @@ -28,7 +28,7 @@ mod unwrap_cast; mod utils; // backwards compatibility -pub use datafusion_expr::simplify::{SimplifyContext, SimplifyInfo}; +pub use datafusion_expr::simplify::SimplifyContext; pub use expr_simplifier::*; pub use simplify_exprs::*; diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 1b25c5ce8a632..f7f100015004a 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -22,7 +22,6 @@ use std::sync::Arc; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result}; use datafusion_expr::Expr; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::merge_schema; @@ -67,17 +66,14 @@ impl OptimizerRule for SimplifyExpressions { plan: LogicalPlan, config: &dyn OptimizerConfig, ) -> Result, DataFusionError> { - let mut execution_props = ExecutionProps::new(); - execution_props.query_execution_start_time = config.query_execution_start_time(); - execution_props.config_options = Some(config.options()); - Self::optimize_internal(plan, &execution_props) + Self::optimize_internal(plan, config) } } impl SimplifyExpressions { fn optimize_internal( plan: LogicalPlan, - execution_props: &ExecutionProps, + config: &dyn OptimizerConfig, ) -> Result> { let schema = if !plan.inputs().is_empty() { DFSchemaRef::new(merge_schema(&plan.inputs())) @@ -100,7 +96,10 @@ impl SimplifyExpressions { Arc::new(DFSchema::empty()) }; - let info = SimplifyContext::new(execution_props).with_schema(schema); + let info = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config.options()) + .with_query_execution_start_time(config.query_execution_start_time()); // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer) // Just need to rewrite our own expressions diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_literal.rs b/datafusion/optimizer/src/simplify_expressions/simplify_literal.rs index 168a6ebb461f0..b77240fc5343a 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_literal.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_literal.rs @@ -28,7 +28,6 @@ use datafusion_common::{ plan_err, }; use datafusion_expr::Expr; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; use std::sync::Arc; @@ -52,10 +51,8 @@ where log::debug!("Parsing expr {:?} to type {}", expr, T::DATA_TYPE); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)), - ); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(Arc::clone(&schema))); // Simplify and coerce expression in case of constant arithmetic operations (e.g., 10 + 5) let simplified_expr: Expr = simplifier diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs index b2349db8c4605..acf0f32ab2234 100644 --- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs +++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs @@ -58,11 +58,11 @@ use arrow::datatypes::DataType; use datafusion_common::{Result, ScalarValue}; use datafusion_common::{internal_err, tree_node::Transformed}; use datafusion_expr::{BinaryExpr, lit}; -use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyInfo}; +use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyContext}; use datafusion_expr_common::casts::{is_supported_type, try_cast_literal_to_type}; -pub(super) fn unwrap_cast_in_comparison_for_binary( - info: &S, +pub(super) fn unwrap_cast_in_comparison_for_binary( + info: &SimplifyContext, cast_expr: Expr, literal: Expr, op: Operator, @@ -104,10 +104,8 @@ pub(super) fn unwrap_cast_in_comparison_for_binary( } } -pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary< - S: SimplifyInfo, ->( - info: &S, +pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary( + info: &SimplifyContext, expr: &Expr, op: Operator, literal: &Expr, @@ -142,10 +140,8 @@ pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary< } } -pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist< - S: SimplifyInfo, ->( - info: &S, +pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist( + info: &SimplifyContext, expr: &Expr, list: &[Expr], ) -> bool { @@ -241,7 +237,6 @@ mod tests { use crate::simplify_expressions::ExprSimplifier; use arrow::datatypes::{Field, TimeUnit}; use datafusion_common::{DFSchema, DFSchemaRef}; - use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{cast, col, in_list, try_cast}; @@ -592,9 +587,8 @@ mod tests { } fn optimize_test(expr: Expr, schema: &DFSchemaRef) -> Expr { - let props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(schema)), + SimplifyContext::default().with_schema(Arc::clone(schema)), ); simplifier.simplify(expr).unwrap() diff --git a/datafusion/spark/src/function/conditional/if.rs b/datafusion/spark/src/function/conditional/if.rs index 906b0bc312f2f..e423f8264ecca 100644 --- a/datafusion/spark/src/function/conditional/if.rs +++ b/datafusion/spark/src/function/conditional/if.rs @@ -86,7 +86,7 @@ impl ScalarUDFImpl for SparkIf { fn simplify( &self, args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { let condition = args[0].clone(); let then_expr = args[1].clone(); diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs index b20e6c24ffeaa..c5948bd7343a6 100644 --- a/datafusion/wasmtest/src/lib.rs +++ b/datafusion/wasmtest/src/lib.rs @@ -24,14 +24,12 @@ extern crate wasm_bindgen; -use datafusion_common::{DFSchema, ScalarValue}; -use datafusion_expr::execution_props::ExecutionProps; +use datafusion_common::ScalarValue; use datafusion_expr::lit; use datafusion_expr::simplify::SimplifyContext; use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_sql::sqlparser::dialect::GenericDialect; use datafusion_sql::sqlparser::parser::Parser; -use std::sync::Arc; use wasm_bindgen::prelude::*; pub fn set_panic_hook() { // When the `console_error_panic_hook` feature is enabled, we can call the @@ -63,10 +61,7 @@ pub fn basic_exprs() { log(&format!("Expr: {expr:?}")); // Simplify Expr (using datafusion-phys-expr and datafusion-optimizer) - let schema = Arc::new(DFSchema::empty()); - let execution_props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&execution_props).with_schema(schema)); + let simplifier = ExprSimplifier::new(SimplifyContext::default()); let simplified_expr = simplifier.simplify(expr).unwrap(); log(&format!("Simplified Expr: {simplified_expr:?}")); } @@ -82,6 +77,8 @@ pub fn basic_parse() { #[cfg(test)] mod test { + use std::sync::Arc; + use super::*; use datafusion::{ arrow::{ diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 61246f00dfe74..a308e69310822 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -19,6 +19,105 @@ # Upgrade Guides +## DataFusion `53.0.0` + +**Note:** DataFusion `53.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version. + +### `SimplifyInfo` trait removed, `SimplifyContext` now uses builder-style API + +The `SimplifyInfo` trait has been removed and replaced with the concrete `SimplifyContext` struct. This simplifies the expression simplification API and removes the need for trait objects. + +**Who is affected:** + +- Users who implemented custom `SimplifyInfo` implementations +- Users who implemented `ScalarUDFImpl::simplify()` for custom scalar functions +- Users who directly use `SimplifyContext` or `ExprSimplifier` + +**Breaking changes:** + +1. The `SimplifyInfo` trait has been removed entirely +2. `SimplifyContext` no longer takes `&ExecutionProps` - it now uses a builder-style API with direct fields +3. `ScalarUDFImpl::simplify()` now takes `&SimplifyContext` instead of `&dyn SimplifyInfo` +4. Time-dependent function simplification (e.g., `now()`) is now optional - if `query_execution_start_time` is `None`, these functions won't be simplified + +**Migration guide:** + +If you implemented a custom `SimplifyInfo`: + +**Before:** + +```rust,ignore +impl SimplifyInfo for MySimplifyInfo { + fn is_boolean_type(&self, expr: &Expr) -> Result { ... } + fn nullable(&self, expr: &Expr) -> Result { ... } + fn execution_props(&self) -> &ExecutionProps { ... } + fn get_data_type(&self, expr: &Expr) -> Result { ... } +} +``` + +**After:** + +Use `SimplifyContext` directly with the builder-style API: + +```rust,ignore +let context = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config_options) + .with_query_execution_start_time(Some(Utc::now())); // or use .with_current_time() +``` + +If you implemented `ScalarUDFImpl::simplify()`: + +**Before:** + +```rust,ignore +fn simplify( + &self, + args: Vec, + info: &dyn SimplifyInfo, +) -> Result { + let now_ts = info.execution_props().query_execution_start_time; + // ... +} +``` + +**After:** + +```rust,ignore +fn simplify( + &self, + args: Vec, + info: &SimplifyContext, +) -> Result { + // query_execution_start_time is now Option> + // Return Original if time is not set (simplification skipped) + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; + // ... +} +``` + +If you created `SimplifyContext` from `ExecutionProps`: + +**Before:** + +```rust,ignore +let props = ExecutionProps::new(); +let context = SimplifyContext::new(&props).with_schema(schema); +``` + +**After:** + +```rust,ignore +let context = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config_options) + .with_current_time(); // Sets query_execution_start_time to Utc::now() +``` + +See [`SimplifyContext` documentation](https://docs.rs/datafusion-expr/latest/datafusion_expr/simplify/struct.SimplifyContext.html) for more details. + ## DataFusion `52.0.0` **Note:** DataFusion `52.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version.