diff --git a/Cargo.lock b/Cargo.lock index a20aad4ec5aa..fb9b7e167f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5823,9 +5823,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.54.0" +version = "0.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" +checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" dependencies = [ "log", "recursive", diff --git a/Cargo.toml b/Cargo.toml index cd8c3e098b99..1a7a915ee89c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,7 +162,7 @@ recursive = "0.1.1" regex = "1.8" rstest = "0.24.0" serde_json = "1" -sqlparser = { version = "0.54.0", features = ["visitor"] } +sqlparser = { version = "0.55.0", features = ["visitor"] } tempfile = "3" tokio = { version = "1.44", features = ["macros", "rt", "sync"] } url = "2.5.4" diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs index 9df5aa993d43..cd002a96683a 100644 --- a/datafusion/datasource/src/statistics.rs +++ b/datafusion/datasource/src/statistics.rs @@ -30,7 +30,7 @@ use arrow::{ compute::SortColumn, row::{Row, Rows}, }; -use datafusion_common::{plan_err, DataFusionError, Result}; +use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result}; use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -202,10 +202,10 @@ impl MinMaxStatistics { .zip(max_values.column_by_name(column.name())) } .ok_or_else(|| { - DataFusionError::Plan(format!( + plan_datafusion_err!( "missing column in MinMaxStatistics::new: '{}'", column.name() - )) + ) }) }) .collect::>>()? diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 82b33650523b..8771b25137cf 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -29,7 +29,7 @@ use std::fmt::{self, Formatter}; use std::hash::Hash; use datafusion_common::{plan_err, sql_err, DataFusionError, Result, ScalarValue}; -use sqlparser::ast; +use sqlparser::ast::{self, ValueWithSpan}; use sqlparser::parser::ParserError::ParserError; /// The frame specification determines which output rows are read by an aggregate @@ -368,7 +368,7 @@ fn convert_frame_bound_to_scalar_value( match units { // For ROWS and GROUPS we are sure that the ScalarValue must be a non-negative integer ... ast::WindowFrameUnits::Rows | ast::WindowFrameUnits::Groups => match v { - ast::Expr::Value(ast::Value::Number(value, false)) => { + ast::Expr::Value(ValueWithSpan{value: ast::Value::Number(value, false), span: _}) => { Ok(ScalarValue::try_from_string(value, &DataType::UInt64)?) }, ast::Expr::Interval(ast::Interval { @@ -379,7 +379,7 @@ fn convert_frame_bound_to_scalar_value( fractional_seconds_precision: None, }) => { let value = match *value { - ast::Expr::Value(ast::Value::SingleQuotedString(item)) => item, + ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item, e => { return sql_err!(ParserError(format!( "INTERVAL expression cannot be {e:?}" @@ -395,14 +395,14 @@ fn convert_frame_bound_to_scalar_value( // ... instead for RANGE it could be anything depending on the type of the ORDER BY clause, // so we use a ScalarValue::Utf8. ast::WindowFrameUnits::Range => Ok(ScalarValue::Utf8(Some(match v { - ast::Expr::Value(ast::Value::Number(value, false)) => value, + ast::Expr::Value(ValueWithSpan{value: ast::Value::Number(value, false), span: _}) => value, ast::Expr::Interval(ast::Interval { value, leading_field, .. }) => { let result = match *value { - ast::Expr::Value(ast::Value::SingleQuotedString(item)) => item, + ast::Expr::Value(ValueWithSpan{value: ast::Value::SingleQuotedString(item), span: _}) => item, e => { return sql_err!(ParserError(format!( "INTERVAL expression cannot be {e:?}" @@ -514,10 +514,10 @@ mod tests { let window_frame = ast::WindowFrame { units: ast::WindowFrameUnits::Rows, start_bound: ast::WindowFrameBound::Preceding(Some(Box::new( - ast::Expr::Value(ast::Value::Number("2".to_string(), false)), + ast::Expr::value(ast::Value::Number("2".to_string(), false)), ))), end_bound: Some(ast::WindowFrameBound::Preceding(Some(Box::new( - ast::Expr::Value(ast::Value::Number("1".to_string(), false)), + ast::Expr::value(ast::Value::Number("1".to_string(), false)), )))), }; @@ -575,10 +575,9 @@ mod tests { test_bound!(Range, None, ScalarValue::Null); // Number - let number = Some(Box::new(ast::Expr::Value(ast::Value::Number( - "42".to_string(), - false, - )))); + let number = Some(Box::new(ast::Expr::Value( + ast::Value::Number("42".to_string(), false).into(), + ))); test_bound!(Rows, number.clone(), ScalarValue::UInt64(Some(42))); test_bound!(Groups, number.clone(), ScalarValue::UInt64(Some(42))); test_bound!( @@ -589,9 +588,9 @@ mod tests { // Interval let number = Some(Box::new(ast::Expr::Interval(ast::Interval { - value: Box::new(ast::Expr::Value(ast::Value::SingleQuotedString( - "1".to_string(), - ))), + value: Box::new(ast::Expr::Value( + ast::Value::SingleQuotedString("1".to_string()).into(), + )), leading_field: Some(ast::DateTimeField::Day), fractional_seconds_precision: None, last_field: None, diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index b77bfab96a37..436f4388d8a3 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -30,7 +30,7 @@ use datafusion_expr::{ use sqlparser::ast::{ DuplicateTreatment, Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, - NullTreatment, ObjectName, OrderByExpr, WindowType, + NullTreatment, ObjectName, OrderByExpr, Spanned, WindowType, }; /// Suggest a valid function based on an invalid input function name @@ -216,13 +216,21 @@ impl SqlToRel<'_, S> { // it shouldn't have ordering requirement as function argument // required ordering should be defined in OVER clause. let is_function_window = over.is_some(); - let sql_parser_span = name.0[0].span; + let sql_parser_span = name.0[0].span(); let name = if name.0.len() > 1 { // DF doesn't handle compound identifiers // (e.g. "foo.bar") for function names yet name.to_string() } else { - crate::utils::normalize_ident(name.0[0].clone()) + match name.0[0].as_ident() { + Some(ident) => crate::utils::normalize_ident(ident.clone()), + None => { + return plan_err!( + "Expected an identifier in function name, but found {:?}", + name.0[0] + ) + } + } }; if name.eq("make_map") { diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 7d358d0b6624..7c276ce53e35 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -22,7 +22,7 @@ use datafusion_common::{ }; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{Case, Expr}; -use sqlparser::ast::{Expr as SQLExpr, Ident}; +use sqlparser::ast::{CaseWhen, Expr as SQLExpr, Ident}; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use datafusion_expr::UNNAMED_TABLE; @@ -216,8 +216,7 @@ impl SqlToRel<'_, S> { pub(super) fn sql_case_identifier_to_expr( &self, operand: Option>, - conditions: Vec, - results: Vec, + conditions: Vec, else_result: Option>, schema: &DFSchema, planner_context: &mut PlannerContext, @@ -231,13 +230,22 @@ impl SqlToRel<'_, S> { } else { None }; - let when_expr = conditions + let when_then_expr = conditions .into_iter() - .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context)) - .collect::>>()?; - let then_expr = results - .into_iter() - .map(|e| self.sql_expr_to_logical_expr(e, schema, planner_context)) + .map(|e| { + Ok(( + Box::new(self.sql_expr_to_logical_expr( + e.condition, + schema, + planner_context, + )?), + Box::new(self.sql_expr_to_logical_expr( + e.result, + schema, + planner_context, + )?), + )) + }) .collect::>>()?; let else_expr = if let Some(e) = else_result { Some(Box::new(self.sql_expr_to_logical_expr( @@ -249,15 +257,7 @@ impl SqlToRel<'_, S> { None }; - Ok(Expr::Case(Case::new( - expr, - when_expr - .iter() - .zip(then_expr.iter()) - .map(|(w, t)| (Box::new(w.to_owned()), Box::new(t.to_owned()))) - .collect(), - else_expr, - ))) + Ok(Expr::Case(Case::new(expr, when_then_expr, else_expr))) } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 6ddc3455cfc5..d29ccdc6a7e9 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -22,7 +22,7 @@ use datafusion_expr::planner::{ use sqlparser::ast::{ AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, - StructField, Subscript, TrimWhereField, Value, + StructField, Subscript, TrimWhereField, Value, ValueWithSpan, }; use datafusion_common::{ @@ -211,7 +211,7 @@ impl SqlToRel<'_, S> { // more context. match sql { SQLExpr::Value(value) => { - self.parse_value(value, planner_context.prepare_param_data_types()) + self.parse_value(value.into(), planner_context.prepare_param_data_types()) } SQLExpr::Extract { field, expr, .. } => { let mut extract_args = vec![ @@ -253,12 +253,10 @@ impl SqlToRel<'_, S> { SQLExpr::Case { operand, conditions, - results, else_result, } => self.sql_case_identifier_to_expr( operand, conditions, - results, else_result, schema, planner_context, @@ -292,7 +290,7 @@ impl SqlToRel<'_, S> { } SQLExpr::TypedString { data_type, value } => Ok(Expr::Cast(Cast::new( - Box::new(lit(value)), + Box::new(lit(value.into_string().unwrap())), self.convert_data_type(&data_type)?, ))), @@ -544,9 +542,10 @@ impl SqlToRel<'_, S> { planner_context, )?), match *time_zone { - SQLExpr::Value(Value::SingleQuotedString(s)) => { - DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())) - } + SQLExpr::Value(ValueWithSpan { + value: Value::SingleQuotedString(s), + span: _, + }) => DataType::Timestamp(TimeUnit::Nanosecond, Some(s.into())), _ => { return not_impl_err!( "Unsupported ast node in sqltorel: {time_zone:?}" @@ -1062,10 +1061,12 @@ impl SqlToRel<'_, S> { Subscript::Index { index } => { // index can be a name, in which case it is a named field access match index { - SQLExpr::Value( - Value::SingleQuotedString(s) - | Value::DoubleQuotedString(s), - ) => Ok(Some(GetFieldAccess::NamedStructField { + SQLExpr::Value(ValueWithSpan { + value: + Value::SingleQuotedString(s) + | Value::DoubleQuotedString(s), + span: _, + }) => Ok(Some(GetFieldAccess::NamedStructField { name: ScalarValue::from(s), })), SQLExpr::JsonAccess { .. } => { @@ -1128,9 +1129,10 @@ impl SqlToRel<'_, S> { } } AccessExpr::Dot(expr) => match expr { - SQLExpr::Value( - Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => Ok(Some(GetFieldAccess::NamedStructField { + SQLExpr::Value(ValueWithSpan { + value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), + span : _ + }) => Ok(Some(GetFieldAccess::NamedStructField { name: ScalarValue::from(s), })), _ => { diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index b7ed04326f40..cce3f3004809 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -21,7 +21,9 @@ use datafusion_common::{ }; use datafusion_expr::expr::Sort; use datafusion_expr::{Expr, SortExpr}; -use sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value}; +use sqlparser::ast::{ + Expr as SQLExpr, OrderByExpr, OrderByOptions, Value, ValueWithSpan, +}; impl SqlToRel<'_, S> { /// Convert sql [OrderByExpr] to `Vec`. @@ -62,9 +64,8 @@ impl SqlToRel<'_, S> { let mut expr_vec = vec![]; for e in exprs { let OrderByExpr { - asc, expr, - nulls_first, + options: OrderByOptions { asc, nulls_first }, with_fill, } = e; @@ -73,7 +74,10 @@ impl SqlToRel<'_, S> { } let expr = match expr { - SQLExpr::Value(Value::Number(v, _)) if literal_to_column => { + SQLExpr::Value(ValueWithSpan { + value: Value::Number(v, _), + span: _, + }) if literal_to_column => { let field_index = v .parse::() .map_err(|err| plan_datafusion_err!("{}", err))?; diff --git a/datafusion/sql/src/expr/unary_op.rs b/datafusion/sql/src/expr/unary_op.rs index a4a973a74aa2..626b79d6c3b6 100644 --- a/datafusion/sql/src/expr/unary_op.rs +++ b/datafusion/sql/src/expr/unary_op.rs @@ -21,7 +21,7 @@ use datafusion_expr::{ type_coercion::{is_interval, is_timestamp}, Expr, ExprSchemable, }; -use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value}; +use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value, ValueWithSpan}; impl SqlToRel<'_, S> { pub(crate) fn parse_sql_unary_op( @@ -61,9 +61,10 @@ impl SqlToRel<'_, S> { match expr { // Optimization: if it's a number literal, we apply the negative operator // here directly to calculate the new literal. - SQLExpr::Value(Value::Number(n, _)) => { - self.parse_sql_number(&n, true) - } + SQLExpr::Value(ValueWithSpan { + value: Value::Number(n, _), + span: _, + }) => self.parse_sql_number(&n, true), SQLExpr::Interval(interval) => { self.sql_interval_to_expr(true, interval) } diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index 168348aee222..d53691ef05d1 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -32,7 +32,9 @@ use datafusion_expr::expr::{BinaryExpr, Placeholder}; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{lit, Expr, Operator}; use log::debug; -use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, UnaryOperator, Value}; +use sqlparser::ast::{ + BinaryOperator, Expr as SQLExpr, Interval, UnaryOperator, Value, ValueWithSpan, +}; use sqlparser::parser::ParserError::ParserError; use std::borrow::Cow; use std::cmp::Ordering; @@ -254,8 +256,14 @@ impl SqlToRel<'_, S> { fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result { let s = match interval_value { - SQLExpr::Value(Value::SingleQuotedString(s) | Value::DoubleQuotedString(s)) => s, - SQLExpr::Value(Value::Number(ref v, long)) => { + SQLExpr::Value(ValueWithSpan { + value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), + span: _, + }) => s, + SQLExpr::Value(ValueWithSpan { + value: Value::Number(ref v, long), + span: _, + }) => { if long { return not_impl_err!( "Unsupported interval argument. Long number not supported: {interval_value:?}" diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index d320b4acd0e8..822b651eae86 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -23,7 +23,7 @@ use std::collections::VecDeque; use std::fmt; -use sqlparser::ast::ExprWithAlias; +use sqlparser::ast::{ExprWithAlias, OrderByOptions}; use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ ast::{ @@ -745,8 +745,7 @@ impl<'a> DFParser<'a> { Ok(OrderByExpr { expr, - asc, - nulls_first, + options: OrderByOptions { asc, nulls_first }, with_fill: None, }) } @@ -793,11 +792,6 @@ impl<'a> DFParser<'a> { fn parse_column_def(&mut self) -> Result { let name = self.parser.parse_identifier()?; let data_type = self.parser.parse_data_type()?; - let collation = if self.parser.parse_keyword(Keyword::COLLATE) { - Some(self.parser.parse_object_name(false)?) - } else { - None - }; let mut options = vec![]; loop { if self.parser.parse_keyword(Keyword::CONSTRAINT) { @@ -819,7 +813,6 @@ impl<'a> DFParser<'a> { Ok(ColumnDef { name, data_type, - collation, options, }) } @@ -1043,7 +1036,6 @@ mod tests { span: Span::empty(), }, data_type, - collation: None, options: vec![], } } @@ -1053,7 +1045,7 @@ mod tests { // positive case let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'"; let display = None; - let name = ObjectName(vec![Ident::from("t")]); + let name = ObjectName::from(vec![Ident::from("t")]); let expected = Statement::CreateExternalTable(CreateExternalTable { name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(display))], @@ -1351,8 +1343,7 @@ mod tests { quote_style: None, span: Span::empty(), }), - asc, - nulls_first, + options: OrderByOptions { asc, nulls_first }, with_fill: None, }]], if_not_exists: false, @@ -1383,8 +1374,10 @@ mod tests { quote_style: None, span: Span::empty(), }), - asc: Some(true), - nulls_first: None, + options: OrderByOptions { + asc: Some(true), + nulls_first: None, + }, with_fill: None, }, OrderByExpr { @@ -1393,8 +1386,10 @@ mod tests { quote_style: None, span: Span::empty(), }), - asc: Some(false), - nulls_first: Some(true), + options: OrderByOptions { + asc: Some(false), + nulls_first: Some(true), + }, with_fill: None, }, ]], @@ -1432,8 +1427,10 @@ mod tests { span: Span::empty(), })), }, - asc: Some(true), - nulls_first: None, + options: OrderByOptions { + asc: Some(true), + nulls_first: None, + }, with_fill: None, }]], if_not_exists: false, @@ -1479,8 +1476,10 @@ mod tests { span: Span::empty(), })), }, - asc: Some(true), - nulls_first: None, + options: OrderByOptions { + asc: Some(true), + nulls_first: None, + }, with_fill: None, }]], if_not_exists: true, @@ -1694,7 +1693,7 @@ mod tests { // For error cases, see: `copy.slt` fn object_name(name: &str) -> CopyToSource { - CopyToSource::Relation(ObjectName(vec![Ident::new(name)])) + CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)])) } // Based on sqlparser-rs diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index daaf70f95304..180017ee9c19 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -23,20 +23,18 @@ use std::vec; use arrow::datatypes::*; use datafusion_common::config::SqlParserOptions; use datafusion_common::error::add_possible_columns_to_diag; +use datafusion_common::TableReference; use datafusion_common::{ field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, Diagnostic, SchemaError, }; -use sqlparser::ast::TimezoneInfo; -use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo}; -use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; -use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; - -use datafusion_common::TableReference; use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result}; use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder}; use datafusion_expr::utils::find_column_exprs; use datafusion_expr::{col, Expr}; +use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo}; +use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; +use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; use crate::utils::make_decimal_type; pub use datafusion_expr::planner::ContextProvider; @@ -179,16 +177,17 @@ impl IdentNormalizer { } } -/// Struct to store the states used by the Planner. The Planner will leverage the states to resolve -/// CTEs, Views, subqueries and PREPARE statements. The states include +/// Struct to store the states used by the Planner. The Planner will leverage the states +/// to resolve CTEs, Views, subqueries and PREPARE statements. The states include /// Common Table Expression (CTE) provided with WITH clause and /// Parameter Data Types provided with PREPARE statement and the query schema of the /// outer query plan. /// /// # Cloning /// -/// Only the `ctes` are truly cloned when the `PlannerContext` is cloned. This helps resolve -/// scoping issues of CTEs. By using cloning, a subquery can inherit CTEs from the outer query +/// Only the `ctes` are truly cloned when the `PlannerContext` is cloned. +/// This helps resolve scoping issues of CTEs. +/// By using cloning, a subquery can inherit CTEs from the outer query /// and can also define its own private CTEs without affecting the outer query. /// #[derive(Debug, Clone)] @@ -331,7 +330,8 @@ impl PlannerContext { /// by subsequent passes. /// /// Key interfaces are: -/// * [`Self::sql_statement_to_plan`]: Convert a statement (e.g. `SELECT ...`) into a [`LogicalPlan`] +/// * [`Self::sql_statement_to_plan`]: Convert a statement +/// (e.g. `SELECT ...`) into a [`LogicalPlan`] /// * [`Self::sql_to_expr`]: Convert an expression (e.g. `1 + 2`) into an [`Expr`] pub struct SqlToRel<'a, S: ContextProvider> { pub(crate) context_provider: &'a S, @@ -444,7 +444,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(plan) } else if idents.len() != plan.schema().fields().len() { plan_err!( - "Source table contains {} columns but only {} names given as column alias", + "Source table contains {} columns but only {} \ + names given as column alias", plan.schema().fields().len(), idents.len() ) @@ -558,16 +559,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Boolean | SQLDataType::Bool => Ok(DataType::Boolean), SQLDataType::TinyInt(_) => Ok(DataType::Int8), SQLDataType::SmallInt(_) | SQLDataType::Int2(_) => Ok(DataType::Int16), - SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => Ok(DataType::Int32), + SQLDataType::Int(_) | SQLDataType::Integer(_) | SQLDataType::Int4(_) => { + Ok(DataType::Int32) + } SQLDataType::BigInt(_) | SQLDataType::Int8(_) => Ok(DataType::Int64), - SQLDataType::UnsignedTinyInt(_) => Ok(DataType::UInt8), - SQLDataType::UnsignedSmallInt(_) | SQLDataType::UnsignedInt2(_) => Ok(DataType::UInt16), - SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | SQLDataType::UnsignedInt4(_) => { - Ok(DataType::UInt32) + SQLDataType::TinyIntUnsigned(_) => Ok(DataType::UInt8), + SQLDataType::SmallIntUnsigned(_) | SQLDataType::Int2Unsigned(_) => { + Ok(DataType::UInt16) } + SQLDataType::IntUnsigned(_) + | SQLDataType::IntegerUnsigned(_) + | SQLDataType::Int4Unsigned(_) => Ok(DataType::UInt32), SQLDataType::Varchar(length) => { match (length, self.options.support_varchar_with_length) { - (Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"), + (Some(_), false) => plan_err!( + "does not support Varchar with length, \ + please set `support_varchar_with_length` to be true" + ), _ => { if self.options.map_varchar_to_utf8view { Ok(DataType::Utf8View) @@ -577,18 +585,27 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } } - SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64), + SQLDataType::BigIntUnsigned(_) | SQLDataType::Int8Unsigned(_) => { + Ok(DataType::UInt64) + } SQLDataType::Float(_) => Ok(DataType::Float32), SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32), - SQLDataType::Double(ExactNumberInfo::None) | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64), - SQLDataType::Double(ExactNumberInfo::Precision(_)|ExactNumberInfo::PrecisionAndScale(_, _)) => { - not_impl_err!("Unsupported SQL type (precision/scale not supported) {sql_type}") + SQLDataType::Double(ExactNumberInfo::None) + | SQLDataType::DoublePrecision + | SQLDataType::Float8 => Ok(DataType::Float64), + SQLDataType::Double( + ExactNumberInfo::Precision(_) | ExactNumberInfo::PrecisionAndScale(_, _), + ) => { + not_impl_err!( + "Unsupported SQL type (precision/scale not supported) {sql_type}" + ) + } + SQLDataType::Char(_) | SQLDataType::Text | SQLDataType::String(_) => { + Ok(DataType::Utf8) } - SQLDataType::Char(_) - | SQLDataType::Text - | SQLDataType::String(_) => Ok(DataType::Utf8), SQLDataType::Timestamp(precision, tz_info) - if precision.is_none() || [0, 3, 6, 9].contains(&precision.unwrap()) => { + if precision.is_none() || [0, 3, 6, 9].contains(&precision.unwrap()) => + { let tz = if matches!(tz_info, TimezoneInfo::Tz) || matches!(tz_info, TimezoneInfo::WithTimeZone) { @@ -617,9 +634,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Ok(DataType::Time64(TimeUnit::Nanosecond)) } else { // We don't support TIMETZ and TIME WITH TIME ZONE for now - not_impl_err!( - "Unsupported SQL type {sql_type:?}" - ) + not_impl_err!("Unsupported SQL type {sql_type:?}") } } SQLDataType::Numeric(exact_number_info) @@ -641,9 +656,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .enumerate() .map(|(idx, field)| { let data_type = self.convert_data_type(&field.field_type)?; - let field_name = match &field.field_name{ + let field_name = match &field.field_name { Some(ident) => ident.clone(), - None => Ident::new(format!("c{idx}")) + None => Ident::new(format!("c{idx}")), }; Ok(Arc::new(Field::new( self.ident_normalizer.normalize(field_name), @@ -654,9 +669,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { .collect::>>()?; Ok(DataType::Struct(Fields::from(fields))) } - // Explicitly list all other types so that if sqlparser - // adds/changes the `SQLDataType` the compiler will tell us on upgrade - // and avoid bugs like https://github.com/apache/datafusion/issues/3059 SQLDataType::Nvarchar(_) | SQLDataType::JSON | SQLDataType::Uuid @@ -670,15 +682,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Enum(_, _) | SQLDataType::Set(_) | SQLDataType::MediumInt(_) - | SQLDataType::UnsignedMediumInt(_) + | SQLDataType::MediumIntUnsigned(_) | SQLDataType::Character(_) | SQLDataType::CharacterVarying(_) | SQLDataType::CharVarying(_) | SQLDataType::CharacterLargeObject(_) | SQLDataType::CharLargeObject(_) - // Unsupported precision | SQLDataType::Timestamp(_, _) - // Precision is not supported | SQLDataType::Time(Some(_), _) | SQLDataType::Dec(_) | SQLDataType::BigNumeric(_) @@ -689,7 +699,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Float64 | SQLDataType::JSONB | SQLDataType::Unspecified - // Clickhouse datatypes | SQLDataType::Int16 | SQLDataType::Int32 | SQLDataType::Int128 @@ -711,7 +720,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Nullable(_) | SQLDataType::LowCardinality(_) | SQLDataType::Trigger - // MySQL datatypes | SQLDataType::TinyBlob | SQLDataType::MediumBlob | SQLDataType::LongBlob @@ -720,11 +728,16 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::LongText | SQLDataType::Bit(_) | SQLDataType::BitVarying(_) - // BigQuery UDFs + | SQLDataType::Signed + | SQLDataType::SignedInteger + | SQLDataType::Unsigned + | SQLDataType::UnsignedInteger | SQLDataType::AnyType - => not_impl_err!( - "Unsupported SQL type {sql_type:?}" - ), + | SQLDataType::Table(_) + | SQLDataType::VarBit(_) + | SQLDataType::GeometricType(_) => { + not_impl_err!("Unsupported SQL type {sql_type:?}") + } } } @@ -754,7 +767,18 @@ pub fn object_name_to_table_reference( enable_normalization: bool, ) -> Result { // Use destructure to make it clear no fields on ObjectName are ignored - let ObjectName(idents) = object_name; + let ObjectName(object_name_parts) = object_name; + let idents = object_name_parts + .into_iter() + .map(|object_name_part| { + object_name_part.as_ident().cloned().ok_or_else(|| { + plan_datafusion_err!( + "Expected identifier, but found: {:?}", + object_name_part + ) + }) + }) + .collect::>>()?; idents_to_table_reference(idents, enable_normalization) } @@ -836,7 +860,7 @@ pub(crate) fn idents_to_table_reference( pub fn object_name_to_qualifier( sql_table_name: &ObjectName, enable_normalization: bool, -) -> String { +) -> Result { let columns = vec!["table_name", "table_schema", "table_catalog"].into_iter(); let normalizer = IdentNormalizer::new(enable_normalization); sql_table_name @@ -844,13 +868,23 @@ pub fn object_name_to_qualifier( .iter() .rev() .zip(columns) - .map(|(ident, column_name)| { - format!( - r#"{} = '{}'"#, - column_name, - normalizer.normalize(ident.clone()) - ) + .map(|(object_name_part, column_name)| { + object_name_part + .as_ident() + .map(|ident| { + format!( + r#"{} = '{}'"#, + column_name, + normalizer.normalize(ident.clone()) + ) + }) + .ok_or_else(|| { + plan_datafusion_err!( + "Expected identifier, but found: {:?}", + object_name_part + ) + }) }) - .collect::>() - .join(" AND ") + .collect::>>() + .map(|parts| parts.join(" AND ")) } diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs index 9d5a54d90b2c..ea641320c01b 100644 --- a/datafusion/sql/src/query.rs +++ b/datafusion/sql/src/query.rs @@ -22,12 +22,13 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use crate::stack::StackGuard; use datafusion_common::{not_impl_err, Constraints, DFSchema, Result}; use datafusion_expr::expr::Sort; +use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::{ CreateMemoryTable, DdlStatement, Distinct, LogicalPlan, LogicalPlanBuilder, }; use sqlparser::ast::{ - Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByExpr, Query, SelectInto, - SetExpr, + Expr as SQLExpr, Offset as SQLOffset, OrderBy, OrderByExpr, OrderByKind, Query, + SelectInto, SetExpr, }; impl SqlToRel<'_, S> { @@ -50,10 +51,8 @@ impl SqlToRel<'_, S> { match set_expr { SetExpr::Select(mut select) => { let select_into = select.into.take(); - // Order-by expressions may refer to columns in the `FROM` clause, - // so we need to process `SELECT` and `ORDER BY` together. - let oby_exprs = to_order_by_exprs(query.order_by)?; - let plan = self.select_to_plan(*select, oby_exprs, planner_context)?; + let plan = + self.select_to_plan(*select, query.order_by, planner_context)?; let plan = self.limit(plan, query.offset, query.limit, planner_context)?; // Process the `SELECT INTO` after `LIMIT`. @@ -153,12 +152,23 @@ impl SqlToRel<'_, S> { /// Returns the order by expressions from the query. fn to_order_by_exprs(order_by: Option) -> Result> { - let Some(OrderBy { exprs, interpolate }) = order_by else { + to_order_by_exprs_with_select(order_by, None) +} + +/// Returns the order by expressions from the query with the select expressions. +pub(crate) fn to_order_by_exprs_with_select( + order_by: Option, + _select_exprs: Option<&Vec>, // TODO: ORDER BY ALL +) -> Result> { + let Some(OrderBy { kind, interpolate }) = order_by else { // If no order by, return an empty array. return Ok(vec![]); }; if let Some(_interpolate) = interpolate { return not_impl_err!("ORDER BY INTERPOLATE is not supported"); } - Ok(exprs) + match kind { + OrderByKind::All(_) => not_impl_err!("ORDER BY ALL is not supported"), + OrderByKind::Expressions(order_by_exprs) => Ok(order_by_exprs), + } } diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 88665401dc31..8a3c20e3971b 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -16,7 +16,7 @@ // under the License. use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_common::{not_impl_err, Column, Result}; +use datafusion_common::{not_impl_err, plan_datafusion_err, Column, Result}; use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder}; use sqlparser::ast::{ Join, JoinConstraint, JoinOperator, ObjectName, TableFactor, TableWithJoins, @@ -55,13 +55,13 @@ impl SqlToRel<'_, S> { self.create_relation(join.relation, planner_context)? }; match join.join_operator { - JoinOperator::LeftOuter(constraint) => { + JoinOperator::LeftOuter(constraint) | JoinOperator::Left(constraint) => { self.parse_join(left, right, constraint, JoinType::Left, planner_context) } - JoinOperator::RightOuter(constraint) => { + JoinOperator::RightOuter(constraint) | JoinOperator::Right(constraint) => { self.parse_join(left, right, constraint, JoinType::Right, planner_context) } - JoinOperator::Inner(constraint) => { + JoinOperator::Inner(constraint) | JoinOperator::Join(constraint) => { self.parse_join(left, right, constraint, JoinType::Inner, planner_context) } JoinOperator::LeftSemi(constraint) => self.parse_join( @@ -136,7 +136,13 @@ impl SqlToRel<'_, S> { ) } else { let id = object_names.swap_remove(0); - Ok(self.ident_normalizer.normalize(id)) + id.as_ident() + .ok_or_else(|| { + plan_datafusion_err!( + "Expected identifier in USING clause" + ) + }) + .map(|ident| self.ident_normalizer.normalize(ident.clone())) } }) .collect::>>()?; @@ -186,6 +192,7 @@ pub(crate) fn is_lateral_join(join: &Join) -> Result { let is_lateral_syntax = is_lateral(&join.relation); let is_apply_syntax = match join.join_operator { JoinOperator::FullOuter(..) + | JoinOperator::Right(..) | JoinOperator::RightOuter(..) | JoinOperator::RightAnti(..) | JoinOperator::RightSemi(..) diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 8078261d9152..dee855f8c000 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -43,7 +43,8 @@ impl SqlToRel<'_, S> { name, alias, args, .. } => { if let Some(func_args) = args { - let tbl_func_name = name.0.first().unwrap().value.to_string(); + let tbl_func_name = + name.0.first().unwrap().as_ident().unwrap().to_string(); let args = func_args .args .into_iter() diff --git a/datafusion/sql/src/resolve.rs b/datafusion/sql/src/resolve.rs index 88416dfe0324..96012a92c09a 100644 --- a/datafusion/sql/src/resolve.rs +++ b/datafusion/sql/src/resolve.rs @@ -81,7 +81,7 @@ impl Visitor for RelationVisitor { cte.visit(self); } self.ctes_in_scope - .push(ObjectName(vec![cte.alias.name.clone()])); + .push(ObjectName::from(vec![cte.alias.name.clone()])); } } ControlFlow::Continue(()) @@ -120,7 +120,7 @@ impl Visitor for RelationVisitor { ); if requires_information_schema { for s in INFORMATION_SCHEMA_TABLES { - self.relations.insert(ObjectName(vec![ + self.relations.insert(ObjectName::from(vec![ Ident::new(INFORMATION_SCHEMA), Ident::new(*s), ])); diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index e9cf4ce48a77..2a2d0b3b3eb8 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -19,6 +19,7 @@ use std::collections::HashSet; use std::sync::Arc; use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; +use crate::query::to_order_by_exprs_with_select; use crate::utils::{ check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up, @@ -44,8 +45,8 @@ use datafusion_expr::{ use indexmap::IndexMap; use sqlparser::ast::{ - Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderByExpr, - WildcardAdditionalOptions, WindowType, + Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderBy, + SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType, }; use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins}; @@ -54,7 +55,7 @@ impl SqlToRel<'_, S> { pub(super) fn select_to_plan( &self, mut select: Select, - order_by: Vec, + query_order_by: Option, planner_context: &mut PlannerContext, ) -> Result { // Check for unsupported syntax first @@ -92,6 +93,9 @@ impl SqlToRel<'_, S> { planner_context, )?; + let order_by = + to_order_by_exprs_with_select(query_order_by, Some(&select_exprs))?; + // Having and group by clause may reference aliases defined in select projection let projected_plan = self.project(base_plan.clone(), select_exprs)?; let select_exprs = projected_plan.expressions(); @@ -643,6 +647,16 @@ impl SqlToRel<'_, S> { } SelectItem::QualifiedWildcard(object_name, options) => { Self::check_wildcard_options(&options)?; + let object_name = match object_name { + SelectItemQualifiedWildcardKind::ObjectName(object_name) => { + object_name + } + SelectItemQualifiedWildcardKind::Expr(_) => { + return plan_err!( + "Qualified wildcard with expression not supported" + ) + } + }; let qualifier = self.object_name_to_table_reference(object_name)?; let planned_options = self.plan_wildcard_options( plan, diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs index a55b3b039087..272d6f874b4d 100644 --- a/datafusion/sql/src/set_expr.rs +++ b/datafusion/sql/src/set_expr.rs @@ -31,7 +31,7 @@ impl SqlToRel<'_, S> { ) -> Result { let set_expr_span = Span::try_from_sqlparser_span(set_expr.span()); match set_expr { - SetExpr::Select(s) => self.select_to_plan(*s, vec![], planner_context), + SetExpr::Select(s) => self.select_to_plan(*s, None, planner_context), SetExpr::Values(v) => self.sql_values_to_plan(v, planner_context), SetExpr::SetOperation { op, diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 1d0dd2390f09..fc6cb0d32fef 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -57,6 +57,7 @@ use datafusion_expr::{ use sqlparser::ast::{ self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn, ShowStatementOptions, SqliteOnConflict, TableObject, UpdateTableFromKind, + ValueWithSpan, }; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, @@ -75,7 +76,13 @@ fn object_name_to_string(object_name: &ObjectName) -> String { object_name .0 .iter() - .map(ident_to_string) + .map(|object_name_part| { + object_name_part + .as_ident() + // TODO: It might be better to return an error + // than to silently use a default value. + .map_or_else(String::new, ident_to_string) + }) .collect::>() .join(".") } @@ -160,7 +167,8 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec {} + | ast::ColumnOption::Alias(_) + | ast::ColumnOption::Collation(_) => {} } } } @@ -276,6 +284,12 @@ impl SqlToRel<'_, S> { with_aggregation_policy, with_row_access_policy, with_tags, + iceberg, + external_volume, + base_location, + catalog, + catalog_sync, + storage_serialization_policy, }) if table_properties.is_empty() && with_options.is_empty() => { if temporary { return not_impl_err!("Temporary tables not supported")?; @@ -396,6 +410,24 @@ impl SqlToRel<'_, S> { if with_tags.is_some() { return not_impl_err!("With tags not supported")?; } + if iceberg { + return not_impl_err!("Iceberg not supported")?; + } + if external_volume.is_some() { + return not_impl_err!("External volume not supported")?; + } + if base_location.is_some() { + return not_impl_err!("Base location not supported")?; + } + if catalog.is_some() { + return not_impl_err!("Catalog not supported")?; + } + if catalog_sync.is_some() { + return not_impl_err!("Catalog sync not supported")?; + } + if storage_serialization_policy.is_some() { + return not_impl_err!("Storage serialization policy not supported")?; + } // Merge inline constraints and existing constraints let mut all_constraints = constraints; @@ -691,6 +723,8 @@ impl SqlToRel<'_, S> { // has_parentheses specifies the syntax, but the plan is the // same no matter the syntax used, so ignore it has_parentheses: _, + immediate, + into, } => { // `USING` is a MySQL-specific syntax and currently not supported. if !using.is_empty() { @@ -698,7 +732,14 @@ impl SqlToRel<'_, S> { "Execute statement with USING is not supported" ); } - + if immediate { + return not_impl_err!( + "Execute statement with IMMEDIATE is not supported" + ); + } + if !into.is_empty() { + return not_impl_err!("Execute statement with INTO is not supported"); + } let empty_schema = DFSchema::empty(); let parameters = parameters .into_iter() @@ -706,7 +747,7 @@ impl SqlToRel<'_, S> { .collect::>>()?; Ok(LogicalPlan::Statement(PlanStatement::Execute(Execute { - name: object_name_to_string(&name), + name: object_name_to_string(&name.unwrap()), parameters, }))) } @@ -907,18 +948,23 @@ impl SqlToRel<'_, S> { returning, or, } => { - let from = + let froms = from.map(|update_table_from_kind| match update_table_from_kind { - UpdateTableFromKind::BeforeSet(from) => from, - UpdateTableFromKind::AfterSet(from) => from, + UpdateTableFromKind::BeforeSet(froms) => froms, + UpdateTableFromKind::AfterSet(froms) => froms, }); + // TODO: support multiple tables in UPDATE SET FROM + if froms.as_ref().is_some_and(|f| f.len() > 1) { + plan_err!("Multiple tables in UPDATE SET FROM not yet supported")?; + } + let update_from = froms.and_then(|mut f| f.pop()); if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } if or.is_some() { plan_err!("ON conflict not supported")?; } - self.update_to_plan(table, assignments, from, selection) + self.update_to_plan(table, assignments, update_from, selection) } Statement::Delete(Delete { @@ -959,12 +1005,28 @@ impl SqlToRel<'_, S> { begin: false, modifier, transaction, + statements, + exception_statements, + has_end_keyword, } => { if let Some(modifier) = modifier { return not_impl_err!( "Transaction modifier not supported: {modifier}" ); } + if !statements.is_empty() { + return not_impl_err!( + "Transaction with multiple statements not supported" + ); + } + if exception_statements.is_some() { + return not_impl_err!( + "Transaction with exception statements not supported" + ); + } + if has_end_keyword { + return not_impl_err!("Transaction with END keyword not supported"); + } self.validate_transaction_kind(transaction)?; let isolation_level: ast::TransactionIsolationLevel = modes .iter() @@ -1089,7 +1151,7 @@ impl SqlToRel<'_, S> { // At the moment functions can't be qualified `schema.name` let name = match &name.0[..] { [] => exec_err!("Function should have name")?, - [n] => n.value.clone(), + [n] => n.as_ident().unwrap().value.clone(), [..] => not_impl_err!("Qualified functions are not supported")?, }; // @@ -1147,7 +1209,7 @@ impl SqlToRel<'_, S> { // At the moment functions can't be qualified `schema.name` let name = match &desc.name.0[..] { [] => exec_err!("Function should have name")?, - [n] => n.value.clone(), + [n] => n.as_ident().unwrap().value.clone(), [..] => not_impl_err!("Qualified functions are not supported")?, }; let statement = DdlStatement::DropFunction(DropFunction { @@ -1348,8 +1410,9 @@ impl SqlToRel<'_, S> { planner_context, ) .unwrap(); - let asc = order_by_expr.asc.unwrap_or(true); - let nulls_first = order_by_expr.nulls_first.unwrap_or(!asc); + let asc = order_by_expr.options.asc.unwrap_or(true); + let nulls_first = + order_by_expr.options.nulls_first.unwrap_or(!asc); SortExpr::new(ordered_expr, asc, nulls_first) }) @@ -1633,7 +1696,7 @@ impl SqlToRel<'_, S> { variable_vec = variable_vec.split_at(variable_vec.len() - 1).0.to_vec(); } - let variable = object_name_to_string(&ObjectName(variable_vec)); + let variable = object_name_to_string(&ObjectName::from(variable_vec)); let base_query = format!("SELECT {columns} FROM information_schema.df_settings"); let query = if variable == "all" { // Add an ORDER BY so the output comes out in a consistent order @@ -1700,7 +1763,7 @@ impl SqlToRel<'_, S> { // Parse value string from Expr let value_string = match &value[0] { SQLExpr::Identifier(i) => ident_to_string(i), - SQLExpr::Value(v) => match crate::utils::value_to_string(v) { + SQLExpr::Value(v) => match crate::utils::value_to_string(&v.value) { None => { return plan_err!("Unsupported Value {}", value[0]); } @@ -1800,7 +1863,9 @@ impl SqlToRel<'_, S> { .0 .iter() .last() - .ok_or_else(|| plan_datafusion_err!("Empty column id"))?; + .ok_or_else(|| plan_datafusion_err!("Empty column id"))? + .as_ident() + .unwrap(); // Validate that the assignment target column exists table_schema.field_with_unqualified_name(&col_name.value)?; Ok((col_name.value.clone(), assign.value.clone())) @@ -1938,7 +2003,11 @@ impl SqlToRel<'_, S> { if let SetExpr::Values(ast::Values { rows, .. }) = (*source.body).clone() { for row in rows.iter() { for (idx, val) in row.iter().enumerate() { - if let SQLExpr::Value(Value::Placeholder(name)) = val { + if let SQLExpr::Value(ValueWithSpan { + value: Value::Placeholder(name), + span: _, + }) = val + { let name = name.replace('$', "").parse::().map_err(|_| { plan_datafusion_err!("Can't parse placeholder: {name}") @@ -2019,7 +2088,7 @@ impl SqlToRel<'_, S> { let where_clause = object_name_to_qualifier( &sql_table_name, self.options.enable_ident_normalization, - ); + )?; if !self.has_table("information_schema", "columns") { return plan_err!( @@ -2144,7 +2213,7 @@ ON p.function_name = r.routine_name let where_clause = object_name_to_qualifier( &sql_table_name, self.options.enable_ident_normalization, - ); + )?; // Do a table lookup to verify the table exists let table_ref = self.object_name_to_table_reference(sql_table_name)?; diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index 211ae84a0043..6fcc203637cc 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -19,13 +19,13 @@ use core::fmt; use std::ops::ControlFlow; use sqlparser::ast::helpers::attached_token::AttachedToken; -use sqlparser::ast::{self, visit_expressions_mut}; +use sqlparser::ast::{self, visit_expressions_mut, OrderByKind, SelectFlavor}; #[derive(Clone)] pub struct QueryBuilder { with: Option, body: Option>, - order_by: Vec, + order_by_kind: Option, limit: Option, limit_by: Vec, offset: Option, @@ -47,8 +47,8 @@ impl QueryBuilder { pub fn take_body(&mut self) -> Option> { self.body.take() } - pub fn order_by(&mut self, value: Vec) -> &mut Self { - self.order_by = value; + pub fn order_by(&mut self, value: OrderByKind) -> &mut Self { + self.order_by_kind = Some(value); self } pub fn limit(&mut self, value: Option) -> &mut Self { @@ -76,14 +76,13 @@ impl QueryBuilder { self } pub fn build(&self) -> Result { - let order_by = if self.order_by.is_empty() { - None - } else { - Some(ast::OrderBy { - exprs: self.order_by.clone(), + let order_by = self + .order_by_kind + .as_ref() + .map(|order_by_kind| ast::OrderBy { + kind: order_by_kind.clone(), interpolate: None, - }) - }; + }); Ok(ast::Query { with: self.with.clone(), @@ -106,7 +105,7 @@ impl QueryBuilder { Self { with: Default::default(), body: Default::default(), - order_by: Default::default(), + order_by_kind: Default::default(), limit: Default::default(), limit_by: Default::default(), offset: Default::default(), @@ -139,6 +138,7 @@ pub struct SelectBuilder { named_window: Vec, qualify: Option, value_table_mode: Option, + flavor: Option, } #[allow(dead_code)] @@ -296,6 +296,10 @@ impl SelectBuilder { window_before_qualify: false, prewhere: None, select_token: AttachedToken::empty(), + flavor: match self.flavor { + Some(ref value) => value.clone(), + None => return Err(Into::into(UninitializedFieldError::from("flavor"))), + }, }) } fn create_empty() -> Self { @@ -315,6 +319,7 @@ impl SelectBuilder { named_window: Default::default(), qualify: Default::default(), value_table_mode: Default::default(), + flavor: Some(SelectFlavor::Standard), } } } @@ -454,6 +459,7 @@ pub struct TableRelationBuilder { with_hints: Vec, version: Option, partitions: Vec, + index_hints: Vec, } #[allow(dead_code)] @@ -482,6 +488,10 @@ impl TableRelationBuilder { self.partitions = value; self } + pub fn index_hints(&mut self, value: Vec) -> &mut Self { + self.index_hints = value; + self + } pub fn build(&self) -> Result { Ok(ast::TableFactor::Table { name: match self.name { @@ -499,6 +509,7 @@ impl TableRelationBuilder { with_ordinality: false, json_path: None, sample: None, + index_hints: self.index_hints.clone(), }) } fn create_empty() -> Self { @@ -509,6 +520,7 @@ impl TableRelationBuilder { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + index_hints: Default::default(), } } } diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index 77d58de7920a..05914b98f55f 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -313,7 +313,7 @@ impl PostgreSqlDialect { } Ok(ast::Expr::Function(Function { - name: ObjectName(vec![Ident { + name: ObjectName::from(vec![Ident { value: func_name.to_string(), quote_style: None, span: Span::empty(), @@ -421,11 +421,11 @@ impl Dialect for MySqlDialect { } fn int64_cast_dtype(&self) -> ast::DataType { - ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![]) + ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![]) } fn int32_cast_dtype(&self) -> ast::DataType { - ast::DataType::Custom(ObjectName(vec![Ident::new("SIGNED")]), vec![]) + ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![]) } fn timestamp_cast_dtype( diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 5e74849cd998..48360377586e 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -18,8 +18,8 @@ use datafusion_expr::expr::{AggregateFunctionParams, Unnest, WindowFunctionParams}; use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::{ - self, Array, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName, - Subscript, TimezoneInfo, UnaryOperator, + self, Array, BinaryOperator, CaseWhen, Expr as AstExpr, Function, Ident, Interval, + ObjectName, OrderByOptions, Subscript, TimezoneInfo, UnaryOperator, ValueWithSpan, }; use std::sync::Arc; use std::vec; @@ -155,12 +155,14 @@ impl Unparser<'_> { }) => { let conditions = when_then_expr .iter() - .map(|(w, _)| self.expr_to_sql_inner(w)) - .collect::>>()?; - let results = when_then_expr - .iter() - .map(|(_, t)| self.expr_to_sql_inner(t)) - .collect::>>()?; + .map(|(cond, result)| { + Ok(CaseWhen { + condition: self.expr_to_sql_inner(cond)?, + result: self.expr_to_sql_inner(result)?, + }) + }) + .collect::>>()?; + let operand = match expr.as_ref() { Some(e) => match self.expr_to_sql_inner(e) { Ok(sql_expr) => Some(Box::new(sql_expr)), @@ -179,7 +181,6 @@ impl Unparser<'_> { Ok(ast::Expr::Case { operand, conditions, - results, else_result, }) } @@ -248,7 +249,7 @@ impl Unparser<'_> { })); Ok(ast::Expr::Function(Function { - name: ObjectName(vec![Ident { + name: ObjectName::from(vec![Ident { value: func_name.to_string(), quote_style: None, span: Span::empty(), @@ -301,7 +302,7 @@ impl Unparser<'_> { None => None, }; Ok(ast::Expr::Function(Function { - name: ObjectName(vec![Ident { + name: ObjectName::from(vec![Ident { value: func_name.to_string(), quote_style: None, span: Span::empty(), @@ -437,7 +438,7 @@ impl Unparser<'_> { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); Ok(ast::Expr::QualifiedWildcard( - ObjectName(idents), + ObjectName::from(idents), attached_token, )) } else { @@ -479,7 +480,7 @@ impl Unparser<'_> { } }, Expr::Placeholder(p) => { - Ok(ast::Expr::Value(ast::Value::Placeholder(p.id.to_string()))) + Ok(ast::Expr::value(ast::Value::Placeholder(p.id.to_string()))) } Expr::OuterReferenceColumn(_, col) => self.col_to_sql(col), Expr::Unnest(unnest) => self.unnest_to_sql(unnest), @@ -509,7 +510,7 @@ impl Unparser<'_> { ) -> Result { let args = self.function_args_to_sql(args)?; Ok(ast::Expr::Function(Function { - name: ObjectName(vec![Ident { + name: ObjectName::from(vec![Ident { value: func_name.to_string(), quote_style: None, span: Span::empty(), @@ -661,8 +662,10 @@ impl Unparser<'_> { Ok(ast::OrderByExpr { expr: sql_parser_expr, - asc: Some(*asc), - nulls_first, + options: OrderByOptions { + asc: Some(*asc), + nulls_first, + }, with_fill: None, }) } @@ -702,7 +705,11 @@ impl Unparser<'_> { datafusion_expr::window_frame::WindowFrameBound::Preceding(val) => { Ok(ast::WindowFrameBound::Preceding({ let val = self.scalar_to_sql(val)?; - if let ast::Expr::Value(ast::Value::Null) = &val { + if let ast::Expr::Value(ValueWithSpan { + value: ast::Value::Null, + span: _, + }) = &val + { None } else { Some(Box::new(val)) @@ -712,7 +719,11 @@ impl Unparser<'_> { datafusion_expr::window_frame::WindowFrameBound::Following(val) => { Ok(ast::WindowFrameBound::Following({ let val = self.scalar_to_sql(val)?; - if let ast::Expr::Value(ast::Value::Null) = &val { + if let ast::Expr::Value(ValueWithSpan { + value: ast::Value::Null, + span: _, + }) = &val + { None } else { Some(Box::new(val)) @@ -1000,7 +1011,7 @@ impl Unparser<'_> { Ok(ast::Expr::Cast { kind: ast::CastKind::Cast, - expr: Box::new(ast::Expr::Value(SingleQuotedString(ts))), + expr: Box::new(ast::Expr::value(SingleQuotedString(ts))), data_type: self.dialect.timestamp_cast_dtype(&time_unit, &None), format: None, }) @@ -1022,7 +1033,7 @@ impl Unparser<'_> { .to_string(); Ok(ast::Expr::Cast { kind: ast::CastKind::Cast, - expr: Box::new(ast::Expr::Value(SingleQuotedString(time))), + expr: Box::new(ast::Expr::value(SingleQuotedString(time))), data_type: ast::DataType::Time(None, TimezoneInfo::None), format: None, }) @@ -1057,102 +1068,102 @@ impl Unparser<'_> { /// For example ScalarValue::Date32(d) corresponds to the ast::Expr CAST('datestr' as DATE) fn scalar_to_sql(&self, v: &ScalarValue) -> Result { match v { - ScalarValue::Null => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Null => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Boolean(Some(b)) => { - Ok(ast::Expr::Value(ast::Value::Boolean(b.to_owned()))) + Ok(ast::Expr::value(ast::Value::Boolean(b.to_owned()))) } - ScalarValue::Boolean(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Boolean(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Float16(Some(f)) => { - Ok(ast::Expr::Value(ast::Value::Number(f.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(f.to_string(), false))) } - ScalarValue::Float16(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Float16(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Float32(Some(f)) => { let f_val = match f.fract() { 0.0 => format!("{:.1}", f), _ => format!("{}", f), }; - Ok(ast::Expr::Value(ast::Value::Number(f_val, false))) + Ok(ast::Expr::value(ast::Value::Number(f_val, false))) } - ScalarValue::Float32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Float32(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Float64(Some(f)) => { let f_val = match f.fract() { 0.0 => format!("{:.1}", f), _ => format!("{}", f), }; - Ok(ast::Expr::Value(ast::Value::Number(f_val, false))) + Ok(ast::Expr::value(ast::Value::Number(f_val, false))) } - ScalarValue::Float64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Float64(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Decimal128(Some(value), precision, scale) => { - Ok(ast::Expr::Value(ast::Value::Number( + Ok(ast::Expr::value(ast::Value::Number( Decimal128Type::format_decimal(*value, *precision, *scale), false, ))) } - ScalarValue::Decimal128(None, ..) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Decimal128(None, ..) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Decimal256(Some(value), precision, scale) => { - Ok(ast::Expr::Value(ast::Value::Number( + Ok(ast::Expr::value(ast::Value::Number( Decimal256Type::format_decimal(*value, *precision, *scale), false, ))) } - ScalarValue::Decimal256(None, ..) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Decimal256(None, ..) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Int8(Some(i)) => { - Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::Int8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int8(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Int16(Some(i)) => { - Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::Int16(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int16(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Int32(Some(i)) => { - Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::Int32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int32(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Int64(Some(i)) => { - Ok(ast::Expr::Value(ast::Value::Number(i.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(i.to_string(), false))) } - ScalarValue::Int64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Int64(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::UInt8(Some(ui)) => { - Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false))) } - ScalarValue::UInt8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt8(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::UInt16(Some(ui)) => { - Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false))) } - ScalarValue::UInt16(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt16(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::UInt32(Some(ui)) => { - Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false))) } - ScalarValue::UInt32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt32(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::UInt64(Some(ui)) => { - Ok(ast::Expr::Value(ast::Value::Number(ui.to_string(), false))) + Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false))) } - ScalarValue::UInt64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::UInt64(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Utf8(Some(str)) => { - Ok(ast::Expr::Value(SingleQuotedString(str.to_string()))) + Ok(ast::Expr::value(SingleQuotedString(str.to_string()))) } - ScalarValue::Utf8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Utf8(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Utf8View(Some(str)) => { - Ok(ast::Expr::Value(SingleQuotedString(str.to_string()))) + Ok(ast::Expr::value(SingleQuotedString(str.to_string()))) } - ScalarValue::Utf8View(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Utf8View(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::LargeUtf8(Some(str)) => { - Ok(ast::Expr::Value(SingleQuotedString(str.to_string()))) + Ok(ast::Expr::value(SingleQuotedString(str.to_string()))) } - ScalarValue::LargeUtf8(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::LargeUtf8(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Binary(Some(_)) => not_impl_err!("Unsupported scalar: {v:?}"), - ScalarValue::Binary(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Binary(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::BinaryView(Some(_)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::BinaryView(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::BinaryView(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::FixedSizeBinary(..) => { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::LargeBinary(Some(_)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::LargeBinary(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::LargeBinary(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::FixedSizeList(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::List(a) => self.scalar_value_list_to_sql(a.values()), ScalarValue::LargeList(a) => self.scalar_value_list_to_sql(a.values()), @@ -1171,14 +1182,14 @@ impl Unparser<'_> { Ok(ast::Expr::Cast { kind: ast::CastKind::Cast, - expr: Box::new(ast::Expr::Value(SingleQuotedString( + expr: Box::new(ast::Expr::value(SingleQuotedString( date.to_string(), ))), data_type: ast::DataType::Date, format: None, }) } - ScalarValue::Date32(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Date32(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Date64(Some(_)) => { let datetime = v .to_array()? @@ -1194,57 +1205,57 @@ impl Unparser<'_> { Ok(ast::Expr::Cast { kind: ast::CastKind::Cast, - expr: Box::new(ast::Expr::Value(SingleQuotedString( + expr: Box::new(ast::Expr::value(SingleQuotedString( datetime.to_string(), ))), data_type: self.ast_type_for_date64_in_cast(), format: None, }) } - ScalarValue::Date64(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Date64(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Time32Second(Some(_t)) => { self.handle_time::(v) } - ScalarValue::Time32Second(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Time32Second(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::Time32Millisecond(Some(_t)) => { self.handle_time::(v) } ScalarValue::Time32Millisecond(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::Time64Microsecond(Some(_t)) => { self.handle_time::(v) } ScalarValue::Time64Microsecond(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::Time64Nanosecond(Some(_t)) => { self.handle_time::(v) } - ScalarValue::Time64Nanosecond(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::Time64Nanosecond(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::TimestampSecond(Some(_ts), tz) => { self.handle_timestamp::(v, tz) } ScalarValue::TimestampSecond(None, _) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::TimestampMillisecond(Some(_ts), tz) => { self.handle_timestamp::(v, tz) } ScalarValue::TimestampMillisecond(None, _) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::TimestampMicrosecond(Some(_ts), tz) => { self.handle_timestamp::(v, tz) } ScalarValue::TimestampMicrosecond(None, _) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::TimestampNanosecond(Some(_ts), tz) => { self.handle_timestamp::(v, tz) } ScalarValue::TimestampNanosecond(None, _) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::IntervalYearMonth(Some(_)) | ScalarValue::IntervalDayTime(Some(_)) @@ -1252,33 +1263,33 @@ impl Unparser<'_> { self.interval_scalar_to_sql(v) } ScalarValue::IntervalYearMonth(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } - ScalarValue::IntervalDayTime(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::IntervalDayTime(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::IntervalMonthDayNano(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::DurationSecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } - ScalarValue::DurationSecond(None) => Ok(ast::Expr::Value(ast::Value::Null)), + ScalarValue::DurationSecond(None) => Ok(ast::Expr::value(ast::Value::Null)), ScalarValue::DurationMillisecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::DurationMillisecond(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::DurationMicrosecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::DurationMicrosecond(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::DurationNanosecond(Some(_d)) => { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::DurationNanosecond(None) => { - Ok(ast::Expr::Value(ast::Value::Null)) + Ok(ast::Expr::value(ast::Value::Null)) } ScalarValue::Struct(_) => not_impl_err!("Unsupported scalar: {v:?}"), ScalarValue::Map(_) => not_impl_err!("Unsupported scalar: {v:?}"), @@ -1302,7 +1313,7 @@ impl Unparser<'_> { // MONTH only if months != 0 && days == 0 && microseconds == 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( months.to_string(), false, ))), @@ -1319,7 +1330,7 @@ impl Unparser<'_> { // DAY only if microseconds == 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( days.to_string(), false, ))), @@ -1337,7 +1348,7 @@ impl Unparser<'_> { if microseconds % 1_000_000 != 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( microseconds.to_string(), false, ))), @@ -1353,7 +1364,7 @@ impl Unparser<'_> { if secs % 60 != 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( secs.to_string(), false, ))), @@ -1369,7 +1380,7 @@ impl Unparser<'_> { if mins % 60 != 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( mins.to_string(), false, ))), @@ -1385,7 +1396,7 @@ impl Unparser<'_> { if hours % 24 != 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( hours.to_string(), false, ))), @@ -1400,7 +1411,7 @@ impl Unparser<'_> { let days = hours / 24; let interval = Interval { - value: Box::new(ast::Expr::Value(ast::Value::Number( + value: Box::new(ast::Expr::value(ast::Value::Number( days.to_string(), false, ))), @@ -1422,7 +1433,7 @@ impl Unparser<'_> { ); }; let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString( + value: Box::new(ast::Expr::value(SingleQuotedString( result.to_uppercase(), ))), leading_field: None, @@ -1436,7 +1447,7 @@ impl Unparser<'_> { IntervalStyle::SQLStandard => match v { ScalarValue::IntervalYearMonth(Some(v)) => { let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString( + value: Box::new(ast::Expr::value(SingleQuotedString( v.to_string(), ))), leading_field: Some(ast::DateTimeField::Month), @@ -1457,7 +1468,7 @@ impl Unparser<'_> { let millis = v.milliseconds % 1_000; let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString(format!( + value: Box::new(ast::Expr::value(SingleQuotedString(format!( "{days} {hours}:{mins}:{secs}.{millis:3}" )))), leading_field: Some(ast::DateTimeField::Day), @@ -1470,7 +1481,7 @@ impl Unparser<'_> { ScalarValue::IntervalMonthDayNano(Some(v)) => { if v.months >= 0 && v.days == 0 && v.nanoseconds == 0 { let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString( + value: Box::new(ast::Expr::value(SingleQuotedString( v.months.to_string(), ))), leading_field: Some(ast::DateTimeField::Month), @@ -1491,7 +1502,7 @@ impl Unparser<'_> { let millis = (v.nanoseconds % 1_000_000_000) / 1_000_000; let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString( + value: Box::new(ast::Expr::value(SingleQuotedString( format!("{days} {hours}:{mins}:{secs}.{millis:03}"), ))), leading_field: Some(ast::DateTimeField::Day), @@ -1536,7 +1547,7 @@ impl Unparser<'_> { let args = self.function_args_to_sql(std::slice::from_ref(&unnest.expr))?; Ok(ast::Expr::Function(Function { - name: ObjectName(vec![Ident { + name: ObjectName::from(vec![Ident { value: "UNNEST".to_string(), quote_style: None, span: Span::empty(), @@ -1565,10 +1576,10 @@ impl Unparser<'_> { DataType::Int16 => Ok(ast::DataType::SmallInt(None)), DataType::Int32 => Ok(self.dialect.int32_cast_dtype()), DataType::Int64 => Ok(self.dialect.int64_cast_dtype()), - DataType::UInt8 => Ok(ast::DataType::UnsignedTinyInt(None)), - DataType::UInt16 => Ok(ast::DataType::UnsignedSmallInt(None)), - DataType::UInt32 => Ok(ast::DataType::UnsignedInteger(None)), - DataType::UInt64 => Ok(ast::DataType::UnsignedBigInt(None)), + DataType::UInt8 => Ok(ast::DataType::TinyIntUnsigned(None)), + DataType::UInt16 => Ok(ast::DataType::SmallIntUnsigned(None)), + DataType::UInt32 => Ok(ast::DataType::IntegerUnsigned(None)), + DataType::UInt64 => Ok(ast::DataType::BigIntUnsigned(None)), DataType::Float16 => { not_impl_err!("Unsupported DataType: conversion: {data_type:?}") } @@ -2574,7 +2585,7 @@ mod tests { let default_dialect = CustomDialectBuilder::new().build(); let mysql_dialect = CustomDialectBuilder::new() .with_int64_cast_dtype(ast::DataType::Custom( - ObjectName(vec![Ident::new("SIGNED")]), + ObjectName::from(vec![Ident::new("SIGNED")]), vec![], )) .build(); @@ -2602,7 +2613,7 @@ mod tests { let default_dialect = CustomDialectBuilder::new().build(); let mysql_dialect = CustomDialectBuilder::new() .with_int32_cast_dtype(ast::DataType::Custom( - ObjectName(vec![Ident::new("SIGNED")]), + ObjectName::from(vec![Ident::new("SIGNED")]), vec![], )) .build(); diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 507a6b2761aa..eb99d1e27031 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -49,7 +49,7 @@ use datafusion_expr::{ LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, UserDefinedLogicalNode, }; -use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef}; +use sqlparser::ast::{self, Ident, OrderByKind, SetExpr, TableAliasColumnDef}; use std::sync::Arc; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -357,7 +357,7 @@ impl Unparser<'_> { table_parts.push( self.new_ident_quoted_if_needs(scan.table_name.table().to_string()), ); - builder.name(ast::ObjectName(table_parts)); + builder.name(ast::ObjectName::from(table_parts)); relation.table(builder); Ok(()) @@ -472,7 +472,7 @@ impl Unparser<'_> { }; if let Some(fetch) = sort.fetch { - query_ref.limit(Some(ast::Expr::Value(ast::Value::Number( + query_ref.limit(Some(ast::Expr::value(ast::Value::Number( fetch.to_string(), false, )))); @@ -668,7 +668,7 @@ impl Unparser<'_> { )); } exists_select.projection(vec![ast::SelectItem::UnnamedExpr( - ast::Expr::Value(ast::Value::Number("1".to_string(), false)), + ast::Expr::value(ast::Value::Number("1".to_string(), false)), )]); query_builder.body(Box::new(SetExpr::Select(Box::new( exists_select.build()?, @@ -1113,11 +1113,13 @@ impl Unparser<'_> { } } - fn sorts_to_sql(&self, sort_exprs: &[SortExpr]) -> Result> { - sort_exprs - .iter() - .map(|sort_expr| self.sort_to_sql(sort_expr)) - .collect::>>() + fn sorts_to_sql(&self, sort_exprs: &[SortExpr]) -> Result { + Ok(OrderByKind::Expressions( + sort_exprs + .iter() + .map(|sort_expr| self.sort_to_sql(sort_expr)) + .collect::>>()?, + )) } fn join_operator_to_sql( @@ -1173,7 +1175,7 @@ impl Unparser<'_> { // this is represented as two columns like `[t1.id, t2.id]` // This code forms `id` (without relation name) let ident = self.new_ident_quoted_if_needs(left_name.to_string()); - object_names.push(ast::ObjectName(vec![ident])); + object_names.push(ast::ObjectName::from(vec![ident])); } // USING is only valid with matching column names; arbitrary expressions // are not allowed diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index f21fb2fcb49f..75038ccc4314 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -448,7 +448,7 @@ pub(crate) fn date_part_to_sql( }; return Ok(Some(ast::Expr::Function(ast::Function { - name: ast::ObjectName(vec![ast::Ident { + name: ast::ObjectName::from(vec![ast::Ident { value: "strftime".to_string(), quote_style: None, span: Span::empty(), @@ -457,7 +457,7 @@ pub(crate) fn date_part_to_sql( duplicate_treatment: None, args: vec![ ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr( - ast::Expr::Value(ast::Value::SingleQuotedString( + ast::Expr::value(ast::Value::SingleQuotedString( field.to_string(), )), )), diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index c3a28f050f5b..0d0ab8808de3 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -372,7 +372,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT j1_string from j1 join j2 on j1.j1_id = j2.j2_id order by j1_id", - expected: r#"SELECT j1.j1_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#, + expected: r#"SELECT j1.j1_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -397,7 +397,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { ) abc ORDER BY abc.j2_string", - expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, + expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -414,7 +414,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { j1_id ) AS agg (id, string_count) ", - expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#, + expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT OUTER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -443,7 +443,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { ) abc ORDER BY abc.j2_string", - expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, + expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -468,7 +468,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { ) abc ORDER BY j2_string", - expected: r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, + expected: r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -560,7 +560,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", - expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(UnparserDefaultDialect {}), }, @@ -596,7 +596,7 @@ fn roundtrip_statement_with_dialect() -> Result<()> { }, TestStatementWithDialect { sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id", - expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) JOIN j1 ON (u.c1 = j1.j1_id)"#, + expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#, parser_dialect: Box::new(GenericDialect {}), unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()), }, @@ -1279,7 +1279,7 @@ fn test_join_with_table_scan_filters() -> Result<()> { let sql = plan_to_sql(&join_plan_with_filter)?; - let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND ("left"."name" LIKE 'some_name' AND (age > 10)))"#; + let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND ("left"."name" LIKE 'some_name' AND (age > 10)))"#; assert_eq!(sql.to_string(), expected_sql); @@ -1294,7 +1294,7 @@ fn test_join_with_table_scan_filters() -> Result<()> { let sql = plan_to_sql(&join_plan_no_filter)?; - let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table ON "left".id = right_table.id AND ("left"."name" LIKE 'some_name' AND (age > 10))"#; + let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND ("left"."name" LIKE 'some_name' AND (age > 10))"#; assert_eq!(sql.to_string(), expected_sql); @@ -1319,7 +1319,7 @@ fn test_join_with_table_scan_filters() -> Result<()> { let sql = plan_to_sql(&join_plan_multiple_filters)?; - let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table."name" = 'before_join_filter_val')) AND (age > 10))) WHERE ("left"."name" = 'after_join_filter_val')"#; + let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table."name" = 'before_join_filter_val')) AND (age > 10))) WHERE ("left"."name" = 'after_join_filter_val')"#; assert_eq!(sql.to_string(), expected_sql); @@ -1349,7 +1349,7 @@ fn test_join_with_table_scan_filters() -> Result<()> { let sql = plan_to_sql(&join_plan_duplicated_filter)?; - let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table.age > 10)) AND (right_table.age < 11)))"#; + let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table.age > 10)) AND (right_table.age < 11)))"#; assert_eq!(sql.to_string(), expected_sql); diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 51b3e37d737a..bdba73876103 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -286,7 +286,7 @@ drop table struct_values; statement ok CREATE OR REPLACE VIEW complex_view AS SELECT { - 'user': { + 'user_information': { 'info': { 'personal': { 'name': 'John Doe', @@ -347,22 +347,22 @@ SELECT { } AS complex_data; query T -SELECT complex_data.user.info.personal.name FROM complex_view; +SELECT complex_data.user_information.info.personal.name FROM complex_view; ---- John Doe query I -SELECT complex_data.user.info.personal.age FROM complex_view; +SELECT complex_data.user_information.info.personal.age FROM complex_view; ---- 30 query T -SELECT complex_data.user.info.address.city FROM complex_view; +SELECT complex_data.user_information.info.address.city FROM complex_view; ---- Anytown query T -SELECT complex_data.user.preferences.languages[2] FROM complex_view; +SELECT complex_data.user_information.preferences.languages[2] FROM complex_view; ---- es diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt index 0f9582b04c58..908d2b34aea4 100644 --- a/datafusion/sqllogictest/test_files/update.slt +++ b/datafusion/sqllogictest/test_files/update.slt @@ -78,8 +78,8 @@ physical_plan_error This feature is not implemented: Unsupported logical plan: D statement ok create table t3(a int, b varchar, c double, d int); -# set from multiple tables, sqlparser only supports from one table -query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: ,"\) +# set from multiple tables, DataFusion only supports from one table +query error DataFusion error: Error during planning: Multiple tables in UPDATE SET FROM not yet supported explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a; # test table alias