From 626e2dd01c6221ee48cbbc046335c8e427e8fe7b Mon Sep 17 00:00:00 2001 From: Subhan Date: Tue, 19 Nov 2024 19:19:18 +0000 Subject: [PATCH 1/8] unparse struct to sql --- datafusion/sql/src/unparser/expr.rs | 51 ++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index f1f28258f9bd..ad087c9af2a1 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -462,7 +462,9 @@ impl Unparser<'_> { match func_name { "make_array" => self.make_array_to_sql(args), "array_element" => self.array_element_to_sql(args), - // TODO: support for the construct and access functions of the `map` and `struct` types + "named_struct" => self.named_struct_to_sql(args), + "get_field" => self.get_field_to_sql(args), + // TODO: support for the construct and access functions of the `map` type _ => self.scalar_function_to_sql_internal(func_name, args), } } @@ -514,6 +516,47 @@ impl Unparser<'_> { }) } + fn named_struct_to_sql(&self, args: &[Expr]) -> Result { + if args.len() % 2 != 0 { + return internal_err!("named_struct must have an even number of arguments"); + } + + let args = args + .chunks_exact(2) + .map(|exprs| { + Ok(ast::DictionaryField { + key: self.new_ident_quoted_if_needs(exprs[0].to_string()), + value: Box::new(self.expr_to_sql(&exprs[1])?), + }) + }) + .collect::>>()?; + + Ok(ast::Expr::Dictionary(args)) + } + + fn get_field_to_sql(&self, args: &[Expr]) -> Result { + if args.len() != 2 { + return internal_err!("get_field must have exactly 2 arguments"); + } + + let mut id = match &args[0] { + Expr::Column(col) => match self.col_to_sql(col)? { + ast::Expr::Identifier(ident) => vec![ident], + ast::Expr::CompoundIdentifier(idents) => idents, + _ => unreachable!(), + }, + _ => return internal_err!("get_field column is invalid"), + }; + + let field = match &args[1] { + Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), + _ => return internal_err!("get_field field_name is invalid"), + }; + id.push(field); + + Ok(ast::Expr::CompoundIdentifier(id)) + } + pub fn sort_to_sql(&self, sort: &Sort) -> Result { let Sort { expr, @@ -1524,6 +1567,7 @@ mod tests { Signature, Volatility, WindowFrame, WindowFunctionDefinition, }; use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt}; + use datafusion_functions::expr_fn::{get_field, named_struct}; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::sum; use datafusion_functions_nested::expr_fn::{array_element, make_array}; @@ -1937,6 +1981,11 @@ mod tests { array_element(make_array(vec![lit(1), lit(2), lit(3)]), lit(1)), "[1, 2, 3][1]", ), + ( + named_struct(vec![col("a"), lit("1"), col("b"), lit(2)]), + "{a: '1', b: 2}", + ), + (get_field(col("a.b"), "c"), "a.b.c"), ]; for (expr, expected) in tests { From 378679e5c74b4e17732e24a96c31aff13d07091e Mon Sep 17 00:00:00 2001 From: Subhan Date: Tue, 19 Nov 2024 21:06:49 +0000 Subject: [PATCH 2/8] add roundtrip statement test for named_struct --- datafusion/sql/src/unparser/expr.rs | 13 +++++++++---- datafusion/sql/tests/cases/plan_to_sql.rs | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ad087c9af2a1..3f877f8e3834 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -523,10 +523,15 @@ impl Unparser<'_> { let args = args .chunks_exact(2) - .map(|exprs| { + .map(|chunk| { + let key = match &chunk[0] { + Expr::Literal(lit) => lit.to_string(), + _ => return internal_err!("named_struct arg is invalid"), + }; + Ok(ast::DictionaryField { - key: self.new_ident_quoted_if_needs(exprs[0].to_string()), - value: Box::new(self.expr_to_sql(&exprs[1])?), + key: Ident::new(key), + value: Box::new(self.expr_to_sql(&chunk[1])?), }) }) .collect::>>()?; @@ -1982,7 +1987,7 @@ mod tests { "[1, 2, 3][1]", ), ( - named_struct(vec![col("a"), lit("1"), col("b"), lit(2)]), + named_struct(vec![lit("a"), lit("1"), lit("b"), lit(2)]), "{a: '1', b: 2}", ), (get_field(col("a.b"), "c"), "a.b.c"), diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index f9d97cdc74af..caafd19e3dbc 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -24,6 +24,7 @@ use datafusion_expr::test::function_stub::{ count_udaf, max_udaf, min_udaf, sum, sum_udaf, }; use datafusion_expr::{col, lit, table_scan, wildcard, LogicalPlanBuilder}; +use datafusion_functions::core::named_struct; use datafusion_functions::unicode; use datafusion_functions_aggregate::grouping::grouping_udaf; use datafusion_functions_nested::make_array::make_array_udf; @@ -188,7 +189,8 @@ fn roundtrip_statement() -> Result<()> { "SELECT ARRAY[1, 2, 3][1]", "SELECT [1, 2, 3]", "SELECT [1, 2, 3][1]", - "SELECT left[1] FROM array" + "SELECT left[1] FROM array", + "SELECT {a:1, b:2}" ]; // For each test sql string, we transform as follows: From 4e83eb36050dd88b0d8450e589734a024d0955ce Mon Sep 17 00:00:00 2001 From: Subhan Date: Tue, 19 Nov 2024 21:27:29 +0000 Subject: [PATCH 3/8] quote keys if needed --- datafusion/sql/src/unparser/expr.rs | 4 ++-- datafusion/sql/tests/cases/plan_to_sql.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 3f877f8e3834..bc2111d11368 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -525,12 +525,12 @@ impl Unparser<'_> { .chunks_exact(2) .map(|chunk| { let key = match &chunk[0] { - Expr::Literal(lit) => lit.to_string(), + Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), _ => return internal_err!("named_struct arg is invalid"), }; Ok(ast::DictionaryField { - key: Ident::new(key), + key, value: Box::new(self.expr_to_sql(&chunk[1])?), }) }) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index caafd19e3dbc..51be6d3ad126 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -24,7 +24,6 @@ use datafusion_expr::test::function_stub::{ count_udaf, max_udaf, min_udaf, sum, sum_udaf, }; use datafusion_expr::{col, lit, table_scan, wildcard, LogicalPlanBuilder}; -use datafusion_functions::core::named_struct; use datafusion_functions::unicode; use datafusion_functions_aggregate::grouping::grouping_udaf; use datafusion_functions_nested::make_array::make_array_udf; From bd4d71de77b2fafc64014d2ea5fc807390b50f45 Mon Sep 17 00:00:00 2001 From: Subhan Date: Wed, 20 Nov 2024 08:42:42 +0000 Subject: [PATCH 4/8] add roundtrip statement test for get_field --- datafusion/sql/tests/cases/plan_to_sql.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 51be6d3ad126..58d99549de31 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -189,7 +189,8 @@ fn roundtrip_statement() -> Result<()> { "SELECT [1, 2, 3]", "SELECT [1, 2, 3][1]", "SELECT left[1] FROM array", - "SELECT {a:1, b:2}" + "SELECT {a:1, b:2}", + "SELECT s.a FROM (SELECT {a:1, b:2} AS s)" ]; // For each test sql string, we transform as follows: From 622288507dedacc22b9d7a11df492c3bb344a3a5 Mon Sep 17 00:00:00 2001 From: Subhan Date: Wed, 20 Nov 2024 21:07:24 +0000 Subject: [PATCH 5/8] improve error messages --- datafusion/sql/src/unparser/expr.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index bc2111d11368..24d23f5eff24 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -526,7 +526,7 @@ impl Unparser<'_> { .map(|chunk| { let key = match &chunk[0] { Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), - _ => return internal_err!("named_struct arg is invalid"), + _ => return internal_err!("named_struct expects even arguments to be strings, but received: {:?}", &chunk[0]) }; Ok(ast::DictionaryField { @@ -548,14 +548,14 @@ impl Unparser<'_> { Expr::Column(col) => match self.col_to_sql(col)? { ast::Expr::Identifier(ident) => vec![ident], ast::Expr::CompoundIdentifier(idents) => idents, - _ => unreachable!(), + other => return internal_err!("expected col_to_sql to return an Identifier or CompoundIdentifier, but received: {:?}", other), }, - _ => return internal_err!("get_field column is invalid"), + _ => return internal_err!("get_field expects first argument to be column, but received: {:?}", &args[0]), }; let field = match &args[1] { Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), - _ => return internal_err!("get_field field_name is invalid"), + _ => return internal_err!("get_field expects second argument to be a string, but received: {:?}", &args[0]), }; id.push(field); From ef4209a37f6d369933820aabccfda62d9120b771 Mon Sep 17 00:00:00 2001 From: Subhan Date: Wed, 20 Nov 2024 21:08:56 +0000 Subject: [PATCH 6/8] fmt --- datafusion/sql/src/unparser/expr.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 24d23f5eff24..20df1de5d037 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -555,7 +555,10 @@ impl Unparser<'_> { let field = match &args[1] { Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), - _ => return internal_err!("get_field expects second argument to be a string, but received: {:?}", &args[0]), + _ => return internal_err!( + "get_field expects second argument to be a string, but received: {:?}", + &args[0] + ), }; id.push(field); From 9c3ba00d8c4fa118c6f8a5d6ebe01b59539662dd Mon Sep 17 00:00:00 2001 From: Subhan Date: Wed, 20 Nov 2024 21:11:25 +0000 Subject: [PATCH 7/8] fmt --- datafusion/sql/src/unparser/expr.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 20df1de5d037..ec8478bbaa04 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -555,10 +555,12 @@ impl Unparser<'_> { let field = match &args[1] { Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), - _ => return internal_err!( + _ => { + return internal_err!( "get_field expects second argument to be a string, but received: {:?}", &args[0] - ), + ) + } }; id.push(field); From a8dd310dba4a046b75d06f52fbba6cec09a25b3d Mon Sep 17 00:00:00 2001 From: delamarch3 <68732277+delamarch3@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:24:27 +0000 Subject: [PATCH 8/8] match string literals only Co-authored-by: Jax Liu --- datafusion/sql/src/unparser/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ec8478bbaa04..ae2607de00a2 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -525,7 +525,7 @@ impl Unparser<'_> { .chunks_exact(2) .map(|chunk| { let key = match &chunk[0] { - Expr::Literal(lit) => self.new_ident_quoted_if_needs(lit.to_string()), + Expr::Literal(ScalarValue::Utf8(Some(s))) => self.new_ident_quoted_if_needs(s.to_string()), _ => return internal_err!("named_struct expects even arguments to be strings, but received: {:?}", &chunk[0]) };