diff --git a/Cargo.toml b/Cargo.toml index 0c76ff196a10..55159a8940c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -177,3 +177,7 @@ large_futures = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_qualifications = "deny" + +## Temp patch for sqlparser +[patch.crates-io] +sqlparser = { git = "https://github.com/apache/datafusion-sqlparser-rs.git", rev = "d6743e924c99cebaa1f065eb721ae085f9f0116f" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 67880c55c2d8..a7b80dd800e0 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -881,9 +881,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" +checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8" dependencies = [ "memchr", "regex-automata", @@ -2210,7 +2210,7 @@ dependencies = [ "http 1.2.0", "hyper 1.5.1", "hyper-util", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", @@ -3112,7 +3112,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.19", + "rustls 0.23.20", "socket2", "thiserror 2.0.6", "tokio", @@ -3130,7 +3130,7 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-pki-types", "slab", "thiserror 2.0.6", @@ -3224,9 +3224,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -3308,7 +3308,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.19", + "rustls 0.23.20", "rustls-native-certs 0.8.1", "rustls-pemfile 2.2.0", "rustls-pki-types", @@ -3427,9 +3427,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.19" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "once_cell", "ring", @@ -3773,8 +3773,7 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" +source = "git+https://github.com/apache/datafusion-sqlparser-rs.git?rev=d6743e924c99cebaa1f065eb721ae085f9f0116f#d6743e924c99cebaa1f065eb721ae085f9f0116f" dependencies = [ "log", "sqlparser_derive", @@ -3782,9 +3781,8 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +version = "0.3.0" +source = "git+https://github.com/apache/datafusion-sqlparser-rs.git?rev=d6743e924c99cebaa1f065eb721ae085f9f0116f#d6743e924c99cebaa1f065eb721ae085f9f0116f" dependencies = [ "proc-macro2", "quote", @@ -4060,7 +4058,7 @@ version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.19", + "rustls 0.23.20", "tokio", ] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index be2bdbc11114..3ada19a727e5 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -67,3 +67,7 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.22" + +## Temp patch for sqlparser +[patch.crates-io] +sqlparser = { git = "https://github.com/apache/datafusion-sqlparser-rs.git", rev = "d6743e924c99cebaa1f065eb721ae085f9f0116f" } diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 5e840f859400..29d33fec14ab 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -887,10 +887,10 @@ pub fn get_available_parallelism() -> usize { #[cfg(test)] mod tests { + use super::*; use crate::ScalarValue::Null; use arrow::array::Float64Array; - - use super::*; + use sqlparser::tokenizer::Span; #[test] fn test_bisect_linear_left_and_right() -> Result<()> { @@ -1118,6 +1118,7 @@ mod tests { let expected_parsed = vec![Ident { value: identifier.to_string(), quote_style, + span: Span::empty(), }]; assert_eq!( diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 4d2a536c4920..30b3c6e2bbeb 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -27,10 +27,6 @@ use arrow_array::{ Array, ArrayRef, Float32Array, Float64Array, Int32Array, RecordBatch, StringArray, }; use arrow_schema::{DataType, Field, Schema}; -use parking_lot::Mutex; -use regex::Regex; -use sqlparser::ast::Ident; - use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState}; use datafusion::prelude::*; use datafusion::{execution::registry::FunctionRegistry, test_util}; @@ -48,6 +44,10 @@ use datafusion_expr::{ Volatility, }; use datafusion_functions_nested::range::range_udf; +use parking_lot::Mutex; +use regex::Regex; +use sqlparser::ast::Ident; +use sqlparser::tokenizer::Span; /// test that casting happens on udfs. /// c11 is f32, but `custom_sqrt` requires f64. Casting happens but the logical plan and @@ -1187,6 +1187,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( name: Some(Ident { value: "name".into(), quote_style: None, + span: Span::empty(), }), data_type: DataType::Utf8, default_expr: None, @@ -1196,6 +1197,7 @@ async fn create_scalar_function_from_sql_statement_postgres_syntax() -> Result<( language: Some(Ident { value: "plrust".into(), quote_style: None, + span: Span::empty(), }), behavior: None, function_body: Some(lit(body)), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c495b5396f53..d0e98327f34b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -222,6 +222,8 @@ use sqlparser::ast::{ /// // to 42 = 5 AND b = 6 /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6)))); #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] +// TODO make the enum smaller with more boxing (looks like Wildcard is now bigger) +#[allow(clippy::large_enum_variant)] pub enum Expr { /// An expression with a specific name. Alias(Alias), diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 67fa23b86990..15b415e3dc42 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -169,6 +169,11 @@ impl FunctionArgs { "Calling {name}: SEPARATOR not supported in function arguments: {sep}" ) } + FunctionArgumentClause::JsonNullClause(jn) => { + return not_impl_err!( + "Calling {name}: JSON NULL clause not supported in function arguments: {jn}" + ) + } } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index e8ec8d7b7d1c..9fc936c91619 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -22,14 +22,14 @@ use datafusion_expr::planner::{ }; use recursive::recursive; use sqlparser::ast::{ - BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, DictionaryField, - Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, - TrimWhereField, Value, + AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType, + DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry, + StructField, Subscript, TrimWhereField, Value, }; use datafusion_common::{ - internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, - ScalarValue, + internal_datafusion_err, internal_err, not_impl_err, plan_err, Column, DFSchema, + Result, ScalarValue, }; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::expr::{InList, WildcardOptions}; @@ -236,15 +236,14 @@ impl SqlToRel<'_, S> { SQLExpr::Identifier(id) => { self.sql_identifier_to_expr(id, schema, planner_context) } - - SQLExpr::MapAccess { .. } => { - not_impl_err!("Map Access") - } - // ["foo"], [4] or [4:5] - SQLExpr::Subscript { expr, subscript } => { - self.sql_subscript_to_expr(*expr, subscript, schema, planner_context) - } + SQLExpr::CompoundFieldAccess { root, access_chain } => self + .sql_compound_field_access_to_expr( + *root, + access_chain, + schema, + planner_context, + ), SQLExpr::CompoundIdentifier(ids) => { self.sql_compound_identifier_to_expr(ids, schema, planner_context) @@ -593,11 +592,11 @@ impl SqlToRel<'_, S> { } not_impl_err!("AnyOp not supported by ExprPlanner: {binary_expr:?}") } - SQLExpr::Wildcard => Ok(Expr::Wildcard { + SQLExpr::Wildcard(_token) => Ok(Expr::Wildcard { qualifier: None, options: WildcardOptions::default(), }), - SQLExpr::QualifiedWildcard(object_name) => Ok(Expr::Wildcard { + SQLExpr::QualifiedWildcard(object_name, _token) => Ok(Expr::Wildcard { qualifier: Some(self.object_name_to_table_reference(object_name)?), options: WildcardOptions::default(), }), @@ -985,84 +984,141 @@ impl SqlToRel<'_, S> { Ok(Expr::Cast(Cast::new(Box::new(expr), dt))) } - fn sql_subscript_to_expr( + fn sql_compound_field_access_to_expr( &self, - expr: SQLExpr, - subscript: Box, + root: SQLExpr, + access_chain: Vec, schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?; - - let field_access = match *subscript { - Subscript::Index { index } => { - // index can be a name, in which case it is a named field access - match index { - SQLExpr::Value( - Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => GetFieldAccess::NamedStructField { - name: ScalarValue::from(s), - }, - SQLExpr::JsonAccess { .. } => { - return not_impl_err!("JsonAccess"); + let mut root = self.sql_expr_to_logical_expr(root, schema, planner_context)?; + let fields = access_chain + .into_iter() + .map(|field| match field { + AccessExpr::Subscript(subscript) => { + match subscript { + Subscript::Index { index } => { + // index can be a name, in which case it is a named field access + match index { + SQLExpr::Value( + Value::SingleQuotedString(s) + | Value::DoubleQuotedString(s), + ) => Ok(Some(GetFieldAccess::NamedStructField { + name: ScalarValue::from(s), + })), + SQLExpr::JsonAccess { .. } => { + not_impl_err!("JsonAccess") + } + // otherwise treat like a list index + _ => Ok(Some(GetFieldAccess::ListIndex { + key: Box::new(self.sql_expr_to_logical_expr( + index, + schema, + planner_context, + )?), + })), + } + } + Subscript::Slice { + lower_bound, + upper_bound, + stride, + } => { + // Means access like [:2] + let lower_bound = if let Some(lower_bound) = lower_bound { + self.sql_expr_to_logical_expr( + lower_bound, + schema, + planner_context, + ) + } else { + not_impl_err!("Slice subscript requires a lower bound") + }?; + + // means access like [2:] + let upper_bound = if let Some(upper_bound) = upper_bound { + self.sql_expr_to_logical_expr( + upper_bound, + schema, + planner_context, + ) + } else { + not_impl_err!("Slice subscript requires an upper bound") + }?; + + // stride, default to 1 + let stride = if let Some(stride) = stride { + self.sql_expr_to_logical_expr( + stride, + schema, + planner_context, + )? + } else { + lit(1i64) + }; + + Ok(Some(GetFieldAccess::ListRange { + start: Box::new(lower_bound), + stop: Box::new(upper_bound), + stride: Box::new(stride), + })) + } } - // otherwise treat like a list index - _ => GetFieldAccess::ListIndex { - key: Box::new(self.sql_expr_to_logical_expr( - index, - schema, - planner_context, - )?), - }, } - } - Subscript::Slice { - lower_bound, - upper_bound, - stride, - } => { - // Means access like [:2] - let lower_bound = if let Some(lower_bound) = lower_bound { - self.sql_expr_to_logical_expr(lower_bound, schema, planner_context) - } else { - not_impl_err!("Slice subscript requires a lower bound") - }?; - - // means access like [2:] - let upper_bound = if let Some(upper_bound) = upper_bound { - self.sql_expr_to_logical_expr(upper_bound, schema, planner_context) - } else { - not_impl_err!("Slice subscript requires an upper bound") - }?; - - // stride, default to 1 - let stride = if let Some(stride) = stride { - self.sql_expr_to_logical_expr(stride, schema, planner_context)? - } else { - lit(1i64) - }; - - GetFieldAccess::ListRange { - start: Box::new(lower_bound), - stop: Box::new(upper_bound), - stride: Box::new(stride), + AccessExpr::Dot(expr) => { + let expr = + self.sql_expr_to_logical_expr(expr, schema, planner_context)?; + match expr { + Expr::Column(Column { name, relation }) => { + if let Some(relation) = &relation { + // If the first part of the dot access is a column reference, we should + // check if the column is from the same table as the root expression. + // If it is, we should replace the root expression with the column reference. + // Otherwise, we should treat the dot access as a named field access. + if relation.table() == root.schema_name().to_string() { + root = Expr::Column(Column { + name, + relation: Some(relation.clone()), + }); + Ok(None) + } else { + plan_err!( + "table name mismatch: {} != {}", + relation.table(), + root.schema_name() + ) + } + } else { + Ok(Some(GetFieldAccess::NamedStructField { + name: ScalarValue::from(name), + })) + } + } + _ => not_impl_err!( + "Dot access not supported for non-column expr: {expr:?}" + ), + } } - } - }; + }) + .collect::>>()?; - let mut field_access_expr = RawFieldAccessExpr { expr, field_access }; - for planner in self.context_provider.get_expr_planners() { - match planner.plan_field_access(field_access_expr, schema)? { - PlannerResult::Planned(expr) => return Ok(expr), - PlannerResult::Original(expr) => { - field_access_expr = expr; + fields + .into_iter() + .flatten() + .try_fold(root, |expr, field_access| { + let mut field_access_expr = RawFieldAccessExpr { expr, field_access }; + for planner in self.context_provider.get_expr_planners() { + match planner.plan_field_access(field_access_expr, schema)? { + PlannerResult::Planned(expr) => return Ok(expr), + PlannerResult::Original(expr) => { + field_access_expr = expr; + } + } } - } - } - - not_impl_err!( - "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}" - ) + not_impl_err!( + "GetFieldAccess not supported by ExprPlanner: {field_access_expr:?}" + ) + }) } } diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index efec6020641c..f185d65fa194 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -21,6 +21,7 @@ use std::collections::VecDeque; use std::fmt; use sqlparser::ast::ExprWithAlias; +use sqlparser::tokenizer::TokenWithSpan; use sqlparser::{ ast::{ ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query, @@ -28,7 +29,7 @@ use sqlparser::{ }, dialect::{keywords::Keyword, Dialect, GenericDialect}, parser::{Parser, ParserError}, - tokenizer::{Token, TokenWithLocation, Tokenizer, Word}, + tokenizer::{Token, Tokenizer, Word}, }; // Use `Parser::expected` instead, if possible @@ -338,7 +339,7 @@ impl<'a> DFParser<'a> { fn expected( &self, expected: &str, - found: TokenWithLocation, + found: TokenWithSpan, ) -> Result { parser_err!(format!("Expected {expected}, found: {found}")) } @@ -876,6 +877,7 @@ mod tests { use super::*; use sqlparser::ast::Expr::Identifier; use sqlparser::ast::{BinaryOperator, DataType, Expr, Ident}; + use sqlparser::tokenizer::Span; fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> { let statements = DFParser::parse_sql(sql)?; @@ -911,6 +913,7 @@ mod tests { name: Ident { value: name.into(), quote_style: None, + span: Span::empty(), }, data_type, collation: None, @@ -1219,6 +1222,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc, nulls_first, @@ -1250,6 +1254,7 @@ mod tests { expr: Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(true), nulls_first: None, @@ -1259,6 +1264,7 @@ mod tests { expr: Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), }), asc: Some(false), nulls_first: Some(true), @@ -1290,11 +1296,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), @@ -1335,11 +1343,13 @@ mod tests { left: Box::new(Identifier(Ident { value: "c1".to_owned(), quote_style: None, + span: Span::empty(), })), op: BinaryOperator::Minus, right: Box::new(Identifier(Ident { value: "c2".to_owned(), quote_style: None, + span: Span::empty(), })), }, asc: Some(true), diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 59fa4ca5f1f6..73871f7fb37c 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -339,7 +339,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan: LogicalPlan, alias: TableAlias, ) -> Result { - let plan = self.apply_expr_alias(plan, alias.columns)?; + let idents = alias.columns.into_iter().map(|c| c.name).collect(); + let plan = self.apply_expr_alias(plan, idents)?; LogicalPlanBuilder::from(plan) .alias(TableReference::bare( @@ -542,7 +543,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Regclass | SQLDataType::Custom(_, _) | SQLDataType::Array(_) - | SQLDataType::Enum(_) + | SQLDataType::Enum(_, _) | SQLDataType::Set(_) | SQLDataType::MediumInt(_) | SQLDataType::UnsignedMediumInt(_) @@ -586,6 +587,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Nullable(_) | SQLDataType::LowCardinality(_) | SQLDataType::Trigger + // MySQL datatypes + | SQLDataType::TinyBlob + | SQLDataType::MediumBlob + | SQLDataType::LongBlob + | SQLDataType::TinyText + | SQLDataType::MediumText + | SQLDataType::LongText + | SQLDataType::Bit(_) + |SQLDataType::BitVarying(_) => not_impl_err!( "Unsupported SQL type {sql_type:?}" ), diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index c2a9bac24e66..9a84c00a8044 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -655,6 +655,7 @@ impl SqlToRel<'_, S> { opt_rename, opt_replace: _opt_replace, opt_ilike: _opt_ilike, + wildcard_token: _wildcard_token, } = options; if opt_rename.is_some() { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 38695f98b5fe..e153569e5b5f 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -54,13 +54,16 @@ use datafusion_expr::{ TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, Volatility, WriteOp, }; -use sqlparser::ast::{self, SqliteOnConflict}; +use sqlparser::ast::{ + self, BeginTransactionKind, NullsDistinctOption, ShowStatementIn, + ShowStatementOptions, SqliteOnConflict, +}; use sqlparser::ast::{ Assignment, AssignmentTarget, ColumnDef, CreateIndex, CreateTable, CreateTableOptions, Delete, DescribeAlias, Expr as SQLExpr, FromTable, Ident, Insert, ObjectName, ObjectType, OneOrManyWithParens, Query, SchemaName, SetExpr, - ShowCreateObject, ShowStatementFilter, Statement, TableConstraint, TableFactor, - TableWithJoins, TransactionMode, UnaryOperator, Value, + ShowCreateObject, Statement, TableConstraint, TableFactor, TableWithJoins, + TransactionMode, UnaryOperator, Value, }; use sqlparser::parser::ParserError::ParserError; @@ -107,6 +110,7 @@ fn calc_inline_constraints_from_columns(columns: &[ColumnDef]) -> Vec SqlToRel<'_, S> { Statement::ShowTables { extended, full, - db_name, - filter, - // SHOW TABLES IN/FROM are equivalent, this field specifies which the user - // specified, but it doesn't affect the plan so ignore the field - clause: _, - } => self.show_tables_to_plan(extended, full, db_name, filter), + terse, + history, + external, + show_options, + } => { + // We only support the basic "SHOW TABLES" + // https://github.com/apache/datafusion/issues/3188 + if extended { + return not_impl_err!("SHOW TABLES EXTENDED not supported")?; + } + if full { + return not_impl_err!("SHOW FULL TABLES not supported")?; + } + if terse { + return not_impl_err!("SHOW TERSE TABLES not supported")?; + } + if history { + return not_impl_err!("SHOW TABLES HISTORY not supported")?; + } + if external { + return not_impl_err!("SHOW EXTERNAL TABLES not supported")?; + } + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if show_in.is_some() { + return not_impl_err!("SHOW TABLES IN not supported")?; + } + if starts_with.is_some() { + return not_impl_err!("SHOW TABLES LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW TABLES LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW TABLES LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!("SHOW TABLES FILTER not supported")?; + } + self.show_tables_to_plan() + } Statement::ShowColumns { extended, full, - table_name, - filter, - } => self.show_columns_to_plan(extended, full, table_name, filter), + show_options, + } => { + let ShowStatementOptions { + show_in, + starts_with, + limit, + limit_from, + filter_position, + } = show_options; + if starts_with.is_some() { + return not_impl_err!("SHOW COLUMNS LIKE not supported")?; + } + if limit.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT not supported")?; + } + if limit_from.is_some() { + return not_impl_err!("SHOW COLUMNS LIMIT FROM not supported")?; + } + if filter_position.is_some() { + return not_impl_err!( + "SHOW COLUMNS with WHERE or LIKE is not supported" + )?; + } + let Some(ShowStatementIn { + // specifies if the syntax was `SHOW COLUMNS IN` or `SHOW + // COLUMNS FROM` which is not different in DataFusion + clause: _, + parent_type, + parent_name, + }) = show_in + else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + if let Some(parent_type) = parent_type { + return not_impl_err!("SHOW COLUMNS IN {parent_type} not supported"); + } + let Some(table_name) = parent_name else { + return plan_err!("SHOW COLUMNS requires a table name"); + }; + + self.show_columns_to_plan(extended, full, table_name) + } Statement::Insert(Insert { or, @@ -766,10 +850,14 @@ impl SqlToRel<'_, S> { from, selection, returning, + or, } => { if returning.is_some() { plan_err!("Update-returning clause not yet supported")?; } + if or.is_some() { + plan_err!("ON conflict not supported")?; + } self.update_to_plan(table, assignments, from, selection) } @@ -810,12 +898,14 @@ impl SqlToRel<'_, S> { modes, begin: false, modifier, + transaction, } => { if let Some(modifier) = modifier { return not_impl_err!( "Transaction modifier not supported: {modifier}" ); } + self.validate_transaction_kind(transaction)?; let isolation_level: ast::TransactionIsolationLevel = modes .iter() .filter_map(|m: &TransactionMode| match m { @@ -879,7 +969,7 @@ impl SqlToRel<'_, S> { }); Ok(LogicalPlan::Statement(statement)) } - Statement::CreateFunction { + Statement::CreateFunction(ast::CreateFunction { or_replace, temporary, name, @@ -889,7 +979,7 @@ impl SqlToRel<'_, S> { behavior, language, .. - } => { + }) => { let return_type = match return_type { Some(t) => Some(self.convert_data_type(&t)?), None => None, @@ -1065,24 +1155,12 @@ impl SqlToRel<'_, S> { } /// Generate a logical plan from a "SHOW TABLES" query - fn show_tables_to_plan( - &self, - extended: bool, - full: bool, - db_name: Option, - filter: Option, - ) -> Result { + fn show_tables_to_plan(&self) -> Result { if self.has_table("information_schema", "tables") { - // We only support the basic "SHOW TABLES" - // https://github.com/apache/datafusion/issues/3188 - if db_name.is_some() || filter.is_some() || full || extended { - plan_err!("Unsupported parameters to SHOW TABLES") - } else { - let query = "SELECT * FROM information_schema.tables;"; - let mut rewrite = DFParser::parse_sql(query)?; - assert_eq!(rewrite.len(), 1); - self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 - } + let query = "SELECT * FROM information_schema.tables;"; + let mut rewrite = DFParser::parse_sql(query)?; + assert_eq!(rewrite.len(), 1); + self.statement_to_plan(rewrite.pop_front().unwrap()) // length of rewrite is 1 } else { plan_err!("SHOW TABLES is not supported unless information_schema is enabled") } @@ -1842,22 +1920,18 @@ impl SqlToRel<'_, S> { extended: bool, full: bool, sql_table_name: ObjectName, - filter: Option, ) -> Result { - if filter.is_some() { - return plan_err!("SHOW COLUMNS with WHERE or LIKE is not supported"); - } + // Figure out the where clause + let where_clause = object_name_to_qualifier( + &sql_table_name, + self.options.enable_ident_normalization, + ); if !self.has_table("information_schema", "columns") { return plan_err!( "SHOW COLUMNS is not supported unless information_schema is enabled" ); } - // Figure out the where clause - let where_clause = object_name_to_qualifier( - &sql_table_name, - self.options.enable_ident_normalization, - ); // Do a table lookup to verify the table exists let table_ref = self.object_name_to_table_reference(sql_table_name)?; @@ -1917,4 +1991,19 @@ impl SqlToRel<'_, S> { .get_table_source(tables_reference) .is_ok() } + + fn validate_transaction_kind( + &self, + kind: Option, + ) -> Result<()> { + match kind { + // BEGIN + None => Ok(()), + // BEGIN TRANSACTION + Some(BeginTransactionKind::Transaction) => Ok(()), + Some(BeginTransactionKind::Work) => { + not_impl_err!("Transaction kind not supported: {kind:?}") + } + } + } } diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs index ad0b5f16b283..345d16adef29 100644 --- a/datafusion/sql/src/unparser/ast.rs +++ b/datafusion/sql/src/unparser/ast.rs @@ -24,6 +24,7 @@ use core::fmt; use sqlparser::ast; +use sqlparser::ast::helpers::attached_token::AttachedToken; #[derive(Clone)] pub(super) struct QueryBuilder { @@ -268,6 +269,7 @@ impl SelectBuilder { connect_by: None, window_before_qualify: false, prewhere: None, + select_token: AttachedToken::empty(), }) } fn create_empty() -> Self { @@ -469,6 +471,7 @@ impl TableRelationBuilder { version: self.version.clone(), partitions: self.partitions.clone(), with_ordinality: false, + json_path: None, }) } fn create_empty() -> Self { diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index ae387d441fa2..31b114bdd370 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -18,15 +18,15 @@ use std::sync::Arc; use arrow_schema::TimeUnit; +use datafusion_common::Result; use datafusion_expr::Expr; use regex::Regex; +use sqlparser::tokenizer::Span; use sqlparser::{ ast::{self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo}, keywords::ALL_KEYWORDS, }; -use datafusion_common::Result; - use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser}; /// `Dialect` to use for Unparsing @@ -288,6 +288,7 @@ impl PostgreSqlDialect { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 48dfc425ee63..b5376f1dad06 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -18,8 +18,8 @@ use datafusion_expr::expr::Unnest; use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::{ - self, Array, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName, - Subscript, TimezoneInfo, UnaryOperator, + self, AccessExpr, Array, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, + ObjectName, Subscript, TimezoneInfo, UnaryOperator, }; use std::sync::Arc; use std::vec; @@ -43,6 +43,8 @@ use datafusion_expr::{ expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, GroupingSet, Like, Operator, TryCast, }; +use sqlparser::ast::helpers::attached_token::AttachedToken; +use sqlparser::tokenizer::Span; /// Convert a DataFusion [`Expr`] to [`ast::Expr`] /// @@ -233,6 +235,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -278,6 +281,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: agg @@ -404,12 +408,16 @@ impl Unparser<'_> { } // TODO: unparsing wildcard addition options Expr::Wildcard { qualifier, .. } => { + let attached_token = AttachedToken::empty(); if let Some(qualifier) = qualifier { let idents: Vec = qualifier.to_vec().into_iter().map(Ident::new).collect(); - Ok(ast::Expr::QualifiedWildcard(ObjectName(idents))) + Ok(ast::Expr::QualifiedWildcard( + ObjectName(idents), + attached_token, + )) } else { - Ok(ast::Expr::Wildcard) + Ok(ast::Expr::Wildcard(attached_token)) } } Expr::GroupingSet(grouping_set) => match grouping_set { @@ -480,6 +488,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, @@ -511,9 +520,9 @@ impl Unparser<'_> { } let array = self.expr_to_sql(&args[0])?; let index = self.expr_to_sql(&args[1])?; - Ok(ast::Expr::Subscript { - expr: Box::new(array), - subscript: Box::new(Subscript::Index { index }), + Ok(ast::Expr::CompoundFieldAccess { + root: Box::new(array), + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index })], }) } @@ -709,6 +718,7 @@ impl Unparser<'_> { Ident { value: ident, quote_style, + span: Span::empty(), } } @@ -716,6 +726,7 @@ impl Unparser<'_> { Ident { value: str, quote_style: None, + span: Span::empty(), } } @@ -1481,6 +1492,7 @@ impl Unparser<'_> { name: ObjectName(vec![Ident { value: "UNNEST".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index e9f9f486ea9a..f2d46a9f4cce 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -44,7 +44,7 @@ use datafusion_expr::{ expr::Alias, BinaryExpr, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, SortExpr, TableScan, Unnest, }; -use sqlparser::ast::{self, Ident, SetExpr}; +use sqlparser::ast::{self, Ident, SetExpr, TableAliasColumnDef}; use std::sync::Arc; /// Convert a DataFusion [`LogicalPlan`] to [`ast::Statement`] @@ -1069,6 +1069,13 @@ impl Unparser<'_> { } fn new_table_alias(&self, alias: String, columns: Vec) -> ast::TableAlias { + let columns = columns + .into_iter() + .map(|ident| TableAliasColumnDef { + name: ident, + data_type: None, + }) + .collect(); ast::TableAlias { name: self.new_ident_quoted_if_needs(alias), columns, diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 354a68f60964..4ce6dc133bc5 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -17,6 +17,10 @@ use std::{cmp::Ordering, sync::Arc, vec}; +use super::{ + dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, + rewrite::TableAliasRewriter, Unparser, +}; use datafusion_common::{ internal_err, tree_node::{Transformed, TransformedResult, TreeNode}, @@ -29,11 +33,7 @@ use datafusion_expr::{ use indexmap::IndexSet; use sqlparser::ast; - -use super::{ - dialect::CharacterLengthStyle, dialect::DateFieldExtractStyle, - rewrite::TableAliasRewriter, Unparser, -}; +use sqlparser::tokenizer::Span; /// Recursively searches children of [LogicalPlan] to find an Aggregate node if exists /// prior to encountering a Join, TableScan, or a nested subquery (derived table factor). @@ -426,6 +426,7 @@ pub(crate) fn date_part_to_sql( name: ast::ObjectName(vec![ast::Ident { value: "strftime".to_string(), quote_style: None, + span: Span::empty(), }]), args: ast::FunctionArguments::List(ast::FunctionArgumentList { duplicate_treatment: None, diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a52..95098038ae2f 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -483,10 +483,10 @@ set datafusion.catalog.information_schema = true; statement ok CREATE TABLE t AS SELECT 1::int as i; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t LIKE 'f'; -statement error Error during planning: SHOW COLUMNS with WHERE or LIKE is not supported +statement error DataFusion error: This feature is not implemented: SHOW COLUMNS with WHERE or LIKE is not supported SHOW COLUMNS FROM t WHERE column_name = 'bar'; query TTTTTT