From 192cab4ef83e731d222fe2e6056d8888597327cf Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 20 Nov 2024 21:59:08 +0800 Subject: [PATCH 01/30] v1 tmp --- src/lib.rs | 1 + src/parser/mod.rs | 176 ++++++++++++++++++++++++++++---------- tests/sqlparser_common.rs | 23 +++++ 3 files changed, 153 insertions(+), 47 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6c8987b63..273f4dec1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,6 +81,7 @@ extern crate alloc; #[macro_use] #[cfg(test)] extern crate pretty_assertions; +extern crate core; pub mod ast; #[macro_use] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1bf173169..f64c52202 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -24,7 +24,6 @@ use core::{ fmt::{self, Display}, str::FromStr, }; - use log::debug; use recursion::RecursionCounter; @@ -956,6 +955,7 @@ impl<'a> Parser<'a> { /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; + debug!("precedence: {}", precedence); debug!("parsing expr"); let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); @@ -1174,53 +1174,23 @@ impl<'a> Parser<'a> { // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident()]; - let mut ends_with_wildcard = false; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ends_with_wildcard = true; - break; - } else { - return self - .expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => { - id_parts.push(Ident::with_quote('\'', s)) - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if ends_with_wildcard { - Ok(Expr::QualifiedWildcard(ObjectName(id_parts))) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } - } else { - Ok(Expr::CompoundIdentifier(id_parts)) + Token::Period => { + self.parse_compound_expr(Expr::Identifier(w.to_ident())) + }, + Token::LParen => { + let id_parts = vec![w.to_ident()]; + let mut expr = self.parse_function(ObjectName(id_parts))?; + // if the function returns an array, it can be subscripted + if self.consume_token(&Token::LBracket) { + expr = self.parse_subscript(expr)?; } + self.parse_compound_expr(expr) + }, + Token::LBracket => { + let _ = self.consume_token(&Token::LBracket); + let ident = Expr::Identifier(w.to_ident()); + let subscript = self.parse_subscript(ident)?; + self.parse_compound_expr(subscript) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) @@ -1373,6 +1343,118 @@ impl<'a> Parser<'a> { } } + pub fn parse_compound_expr(&mut self, init: Expr) -> Result{ + let mut expressions: Vec = vec![init]; + let mut ends_with_wildcard = false; + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let mut expr = Expr::Identifier(w.to_ident()); + if self.consume_token(&Token::LBracket) { + expr = self.parse_subscript(expr)?; + } + expressions.push(expr) + }, + Token::Mul => { + // Postgres explicitly allows funcnm(tablenm.*) and the + // function array_agg traverses this control flow + if dialect_of!(self is PostgreSqlDialect) { + ends_with_wildcard = true; + break; + } else { + return self + .expected("an identifier after '.'", next_token); + } + }, + Token::SingleQuotedString(s) => { + expressions.push(Expr::Identifier(Ident::with_quote('\'', s))) + }, + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } + } + } + + dbg!(&expressions); + + if ends_with_wildcard { + let id_parts = Self::exprs_to_idents(&expressions); + if id_parts.len() != expressions.len() { + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + } + let id_parts = id_parts.into_iter().cloned().collect::>(); + Ok(Expr::QualifiedWildcard(ObjectName(id_parts))) + } else if self.consume_token(&Token::LParen) { + let id_parts = Self::exprs_to_idents(&expressions); + if id_parts.len() != expressions.len() { + return self.expected("an identifier or a '*' after '.'", self.peek_token()); + } + let id_parts: Vec = id_parts.into_iter().cloned().collect::>(); + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::Plus, Token::RParen]) + { + Ok(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + self.prev_token(); + self.parse_function(ObjectName(id_parts)) + } + } else { + let id_parts = Self::exprs_to_idents(&expressions); + if id_parts.len() == expressions.len() { + let id_parts: Vec = id_parts.into_iter().cloned().collect::>(); + return Ok(Expr::CompoundIdentifier(id_parts)); + } + self.exprs_to_composite_access(&expressions) + } + } + + fn exprs_to_idents(exprs: &[Expr]) -> Vec<&Ident> { + exprs.iter().filter_map(|expr| { + if let Expr::Identifier(ident) = expr { + Some(ident) + } + else { + None + } + }).collect() + } + + fn exprs_to_composite_access(&self, exprs: &[Expr]) -> Result { + let head = exprs.first().expect("at least one expr").clone(); + let remain = exprs[1..exprs.len()].to_vec(); + Ok(remain.iter().fold(head, |acc, e| { + match e { + Expr::Identifier(ident) => { + Expr::CompositeAccess { + expr: Box::new(acc), + key: ident.clone(), + } + } + Expr::Subscript { expr, subscript } => { + let key = Expr::CompositeAccess { + expr: Box::new(acc), + key: match expr.as_ref() { + Expr::Identifier(ident) => ident.clone(), + _ => unreachable!(), + }, + }; + Expr::Subscript { + expr: Box::new(key), + subscript: subscript.clone(), + } + } + _ => unreachable!(), + } + })) + } + pub fn parse_utility_options(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Self::parse_utility_option)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b41063859..ff691c562 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2820,6 +2820,29 @@ fn parse_window_function_null_treatment_arg() { ); } +#[test] +fn test_compound_expr() { + let supported_dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), + Box::new(BigQueryDialect {}), + ]); + let sqls = [ + "SELECT abc[1].f1 FROM t", + "SELECT abc[1].f1.f2 FROM t", + "SELECT f1.abc[1] FROM t", + "SELECT f1.f2.abc[1] FROM t", + "SELECT f1.abc[1].f2 FROM t", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT named_struct('a', 1, 'b', 2).a", + "SELECT make_array(1, 2, 3)[1]", + "SELECT make_array(named_struct('a', 1))[1].a", + ]; + for sql in sqls { + supported_dialects.verified_stmt(sql); + } +} + #[test] fn parse_negative_value() { let sql1 = "SELECT -1"; From 6be3c357ced7253aaa1b121503af0cf611adab92 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 26 Nov 2024 00:24:44 +0800 Subject: [PATCH 02/30] remove MapAccess --- src/ast/mod.rs | 45 ---------- src/parser/mod.rs | 155 ++++++++++++++++------------------ tests/sqlparser_bigquery.rs | 49 ++++++----- tests/sqlparser_clickhouse.rs | 24 +++--- tests/sqlparser_common.rs | 25 +++--- 5 files changed, 126 insertions(+), 172 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9185c9df4..7a215d7d2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -387,40 +387,6 @@ pub enum CastFormat { ValueAtTimeZone(Value, Value), } -/// Represents the syntax/style used in a map access. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MapAccessSyntax { - /// Access using bracket notation. `mymap[mykey]` - Bracket, - /// Access using period notation. `mymap.mykey` - Period, -} - -/// Expression used to access a value in a nested structure. -/// -/// Example: `SAFE_OFFSET(0)` in -/// ```sql -/// SELECT mymap[SAFE_OFFSET(0)]; -/// ``` -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MapAccessKey { - pub key: Expr, - pub syntax: MapAccessSyntax, -} - -impl fmt::Display for MapAccessKey { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.syntax { - MapAccessSyntax::Bracket => write!(f, "[{}]", self.key), - MapAccessSyntax::Period => write!(f, ".{}", self.key), - } - } -} - /// An element of a JSON path. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -798,14 +764,6 @@ pub enum Expr { data_type: DataType, value: String, }, - /// Access a map-like object by field (e.g. `column['field']` or `column[4]` - /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) - /// - MapAccess { - column: Box, - keys: Vec, - }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// Arbitrary expr method call @@ -1208,9 +1166,6 @@ impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{s}"), - Expr::MapAccess { column, keys } => { - write!(f, "{column}{}", display_separated(keys, "")) - } Expr::Wildcard => f.write_str("*"), Expr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f64c52202..a13689738 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1182,14 +1182,14 @@ impl<'a> Parser<'a> { let mut expr = self.parse_function(ObjectName(id_parts))?; // if the function returns an array, it can be subscripted if self.consume_token(&Token::LBracket) { - expr = self.parse_subscript(expr)?; + expr = self.parse_multi_dim_subscript(expr)?; } self.parse_compound_expr(expr) }, Token::LBracket => { let _ = self.consume_token(&Token::LBracket); let ident = Expr::Identifier(w.to_ident()); - let subscript = self.parse_subscript(ident)?; + let subscript = self.parse_multi_dim_subscript(ident)?; self.parse_compound_expr(subscript) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html @@ -1343,19 +1343,22 @@ impl<'a> Parser<'a> { } } - pub fn parse_compound_expr(&mut self, init: Expr) -> Result{ + pub fn parse_compound_expr(&mut self, init: Expr) -> Result { let mut expressions: Vec = vec![init]; let mut ends_with_wildcard = false; while self.consume_token(&Token::Period) { let next_token = self.next_token(); match next_token.token { Token::Word(w) => { - let mut expr = Expr::Identifier(w.to_ident()); - if self.consume_token(&Token::LBracket) { - expr = self.parse_subscript(expr)?; + let expr = if self.consume_token(&Token::LBracket) { + let expr = Expr::Identifier(w.to_ident()); + self.parse_multi_dim_subscript(expr)? } + else { + Expr::Identifier(w.to_ident()) + }; expressions.push(expr) - }, + } Token::Mul => { // Postgres explicitly allows funcnm(tablenm.*) and the // function array_agg traverses this control flow @@ -1363,22 +1366,18 @@ impl<'a> Parser<'a> { ends_with_wildcard = true; break; } else { - return self - .expected("an identifier after '.'", next_token); + return self.expected("an identifier after '.'", next_token); } - }, + } Token::SingleQuotedString(s) => { expressions.push(Expr::Identifier(Ident::with_quote('\'', s))) - }, + } _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); + return self.expected("an identifier or a '*' after '.'", next_token); } } } - dbg!(&expressions); - if ends_with_wildcard { let id_parts = Self::exprs_to_idents(&expressions); if id_parts.len() != expressions.len() { @@ -1416,42 +1415,40 @@ impl<'a> Parser<'a> { } fn exprs_to_idents(exprs: &[Expr]) -> Vec<&Ident> { - exprs.iter().filter_map(|expr| { - if let Expr::Identifier(ident) = expr { - Some(ident) - } - else { - None - } - }).collect() + exprs + .iter() + .filter_map(|expr| { + if let Expr::Identifier(ident) = expr { + Some(ident) + } else { + None + } + }) + .collect() } fn exprs_to_composite_access(&self, exprs: &[Expr]) -> Result { let head = exprs.first().expect("at least one expr").clone(); let remain = exprs[1..exprs.len()].to_vec(); - Ok(remain.iter().fold(head, |acc, e| { - match e { - Expr::Identifier(ident) => { - Expr::CompositeAccess { - expr: Box::new(acc), - key: ident.clone(), - } - } - Expr::Subscript { expr, subscript } => { - let key = Expr::CompositeAccess { - expr: Box::new(acc), - key: match expr.as_ref() { - Expr::Identifier(ident) => ident.clone(), - _ => unreachable!(), - }, - }; - Expr::Subscript { - expr: Box::new(key), - subscript: subscript.clone(), - } + Ok(remain.iter().fold(head, |acc, e| match e { + Expr::Identifier(ident) => Expr::CompositeAccess { + expr: Box::new(acc), + key: ident.clone(), + }, + Expr::Subscript { expr, subscript } => { + let key = Expr::CompositeAccess { + expr: Box::new(acc), + key: match expr.as_ref() { + Expr::Identifier(ident) => ident.clone(), + _ => unreachable!(), + }, + }; + Expr::Subscript { + expr: Box::new(key), + subscript: subscript.clone(), } - _ => unreachable!(), } + _ => unreachable!(), })) } @@ -3016,8 +3013,9 @@ impl<'a> Parser<'a> { expr: Box::new(expr), }) } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - self.parse_subscript(expr) + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) + { + self.parse_multi_dim_subscript(expr) } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) @@ -3118,6 +3116,19 @@ impl<'a> Parser<'a> { }) } + /// Parse an multi-dimension array accessing like `[1:3][1][1]` + /// + /// Parser is right after the first `[` + pub fn parse_multi_dim_subscript(&mut self, mut expr: Expr) -> Result { + loop { + expr = self.parse_subscript(expr)?; + if !self.consume_token(&Token::LBracket) { + break; + } + } + Ok(expr) + } + /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` @@ -3190,42 +3201,24 @@ impl<'a> Parser<'a> { pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_expr()?; + dbg!(&key); + let result = match key { + Expr::Identifier(ident) => Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key: ident, + }), + Expr::Value(Value::SingleQuotedString(s)) + | Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key: Ident::new(s), + }), + _ => parser_err!( + "Expected identifier or string literal", + self.peek_token().location + ), + }; self.expect_token(&Token::RBracket)?; - - let mut keys = vec![MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - }]; - loop { - let key = match self.peek_token().token { - Token::LBracket => { - self.next_token(); // consume `[` - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - } - } - // Access on BigQuery nested and repeated expressions can - // mix notations in the same expression. - // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns - Token::Period if dialect_of!(self is BigQueryDialect) => { - self.next_token(); // consume `.` - MapAccessKey { - key: self.parse_expr()?, - syntax: MapAccessSyntax::Period, - } - } - _ => break, - }; - keys.push(key); - } - - Ok(Expr::MapAccess { - column: Box::new(expr), - keys, - }) + result } /// Parses the parens following the `[ NOT ] IN` operator. diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d4c178bbf..0cc36b2dc 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1956,29 +1956,36 @@ fn parse_map_access_expr() { let sql = "users[-1][safe_offset(2)].a.b"; let expr = bigquery().verified_expr(sql); - fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey { - MapAccessKey { key, syntax } + fn composite_access(expr: Expr, key: impl Into) -> Expr { + Expr::CompositeAccess { + expr: Box::new(expr), + key: key.into(), + } } - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - map_access_key( - Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Expr::Value(number("1")).into(), - }, - MapAccessSyntax::Bracket, - ), - map_access_key( - call("safe_offset", [Expr::Value(number("2"))]), - MapAccessSyntax::Bracket, - ), - map_access_key( - Expr::CompoundIdentifier(vec![Ident::new("a"), Ident::new("b")]), - MapAccessSyntax::Period, + + fn subscript(expr: Expr, index: Expr) -> Expr { + Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(Subscript::Index { index }), + } + } + + let expected = composite_access( + composite_access( + subscript( + subscript( + Expr::Identifier("users".into()), + Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Expr::Value(number("1"))), + }, + ), + call("safe_offset", vec![Expr::Value(number("2"))]), ), - ], - }; + "a", + ), + "b", + ); assert_eq!(expr, expected); let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b"; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 90af12ab7..8a538e7fe 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -23,7 +23,7 @@ mod test_utils; use test_utils::*; -use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; +use sqlparser::ast::Expr::{BinaryOp, Identifier}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; use sqlparser::ast::Value::Number; @@ -41,21 +41,20 @@ fn parse_map_access_expr() { distinct: None, top: None, top_before_distinct: false, - projection: vec![UnnamedExpr(MapAccess { - column: Box::new(Identifier(Ident { + projection: vec![UnnamedExpr(Expr::Subscript { + expr: Box::new(Identifier(Ident { value: "string_values".to_string(), quote_style: None, })), - keys: vec![MapAccessKey { - key: call( + subscript: Box::new(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_names")), Expr::Value(Value::SingleQuotedString("endpoint".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + }), })], into: None, from: vec![TableWithJoins { @@ -81,18 +80,17 @@ fn parse_map_access_expr() { }), op: BinaryOperator::And, right: Box::new(BinaryOp { - left: Box::new(MapAccess { - column: Box::new(Identifier(Ident::new("string_value"))), - keys: vec![MapAccessKey { - key: call( + left: Box::new(Expr::Subscript { + expr: Box::new(Identifier(Ident::new("string_value"))), + subscript: Box::new(Subscript::Index { + index: call( "indexOf", [ Expr::Identifier(Ident::new("string_name")), Expr::Value(Value::SingleQuotedString("app".to_string())) ] ), - syntax: MapAccessSyntax::Bracket - }], + }), }), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ff691c562..31b0d5f3b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2822,6 +2822,7 @@ fn parse_window_function_null_treatment_arg() { #[test] fn test_compound_expr() { + env_logger::init(); let supported_dialects = TestedDialects::new(vec![ Box::new(GenericDialect {}), Box::new(DuckDbDialect {}), @@ -2837,6 +2838,8 @@ fn test_compound_expr() { "SELECT named_struct('a', 1, 'b', 2).a", "SELECT make_array(1, 2, 3)[1]", "SELECT make_array(named_struct('a', 1))[1].a", + "SELECT abc[1][-1].a.b FROM t", + "SELECT abc[1][-1].a.b[1] FROM t", ]; for sql in sqls { supported_dialects.verified_stmt(sql); @@ -10103,21 +10106,19 @@ fn parse_map_access_expr() { Box::new(ClickHouseDialect {}), ]); let expr = dialects.verified_expr(sql); - let expected = Expr::MapAccess { - column: Expr::Identifier(Ident::new("users")).into(), - keys: vec![ - MapAccessKey { - key: Expr::UnaryOp { + let expected = Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("users"))), + subscript: Box::new(Subscript::Index { + index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, - syntax: MapAccessSyntax::Bracket, - }, - MapAccessKey { - key: call("safe_offset", [Expr::Value(number("2"))]), - syntax: MapAccessSyntax::Bracket, - }, - ], + }), + }), + subscript: Box::new(Subscript::Index { + index: call("safe_offset", [Expr::Value(number("2"))]), + }), }; assert_eq!(expr, expected); From 0e916ddc5a53540ab62c8052740b614eb932c666 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 26 Nov 2024 00:39:43 +0800 Subject: [PATCH 03/30] fix fmt --- src/parser/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a13689738..764d7904e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1353,8 +1353,7 @@ impl<'a> Parser<'a> { let expr = if self.consume_token(&Token::LBracket) { let expr = Expr::Identifier(w.to_ident()); self.parse_multi_dim_subscript(expr)? - } - else { + } else { Expr::Identifier(w.to_ident()) }; expressions.push(expr) From 767b531ad8adfe6e79e054d95c8b722807c1c654 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 26 Nov 2024 00:44:38 +0800 Subject: [PATCH 04/30] remove debug message --- src/parser/mod.rs | 1 - tests/sqlparser_common.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 764d7904e..99544bb1b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3200,7 +3200,6 @@ impl<'a> Parser<'a> { pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_expr()?; - dbg!(&key); let result = match key { Expr::Identifier(ident) => Ok(Expr::CompositeAccess { expr: Box::new(expr), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 31b0d5f3b..6a0b56fc5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2822,7 +2822,6 @@ fn parse_window_function_null_treatment_arg() { #[test] fn test_compound_expr() { - env_logger::init(); let supported_dialects = TestedDialects::new(vec![ Box::new(GenericDialect {}), Box::new(DuckDbDialect {}), From dee8b4079c398939a5d2efbaf1f7d077eef4f1bd Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 27 Nov 2024 22:12:48 +0800 Subject: [PATCH 05/30] fix span test --- src/ast/spans.rs | 3 -- src/parser/mod.rs | 74 ++++++++++------------------------------------- 2 files changed, 15 insertions(+), 62 deletions(-) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 8e8c7b14a..abe012530 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1276,9 +1276,6 @@ impl Spanned for Expr { Expr::Nested(expr) => expr.span(), Expr::Value(value) => value.span(), Expr::TypedString { .. } => Span::empty(), - Expr::MapAccess { column, keys } => column - .span() - .union(&union_spans(keys.iter().map(|i| i.key.span()))), Expr::Function(function) => function.span(), Expr::GroupingSets(vec) => { union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span()))) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1fb7f6e69..027b07697 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1147,10 +1147,10 @@ impl<'a> Parser<'a> { ) -> Result { match self.peek_token().token { Token::Period => { - self.parse_compound_expr(Expr::Identifier(w.to_ident())) + self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span))) }, Token::LParen => { - let id_parts = vec![w.to_ident()]; + let id_parts = vec![w.to_ident(w_span)]; let mut expr = self.parse_function(ObjectName(id_parts))?; // if the function returns an array, it can be subscripted if self.consume_token(&Token::LBracket) { @@ -1160,7 +1160,7 @@ impl<'a> Parser<'a> { }, Token::LBracket => { let _ = self.consume_token(&Token::LBracket); - let ident = Expr::Identifier(w.to_ident()); + let ident = Expr::Identifier(w.to_ident(w_span)); let subscript = self.parse_multi_dim_subscript(ident)?; self.parse_compound_expr(subscript) } @@ -1171,57 +1171,10 @@ impl<'a> Parser<'a> { if w.value.starts_with('_') => { Ok(Expr::IntroducedString { - introducer: w.value, + introducer: w.value.clone(), value: self.parse_introduced_string_value()?, }) } - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident(w_span)]; - let mut ending_wildcard: Option = None; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ending_wildcard = Some(next_token); - break; - } else { - return self.expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if let Some(wildcard_token) = ending_wildcard { - Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(wildcard_token), - )) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } - } else { - Ok(Expr::CompoundIdentifier(id_parts)) - } - } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1460,16 +1413,16 @@ impl<'a> Parser<'a> { pub fn parse_compound_expr(&mut self, init: Expr) -> Result { let mut expressions: Vec = vec![init]; - let mut ends_with_wildcard = false; + let mut ending_wildcard: Option = None; while self.consume_token(&Token::Period) { let next_token = self.next_token(); match next_token.token { Token::Word(w) => { let expr = if self.consume_token(&Token::LBracket) { - let expr = Expr::Identifier(w.to_ident()); + let expr = Expr::Identifier(w.to_ident(next_token.span)); self.parse_multi_dim_subscript(expr)? } else { - Expr::Identifier(w.to_ident()) + Expr::Identifier(w.to_ident(next_token.span)) }; expressions.push(expr) } @@ -1477,7 +1430,7 @@ impl<'a> Parser<'a> { // Postgres explicitly allows funcnm(tablenm.*) and the // function array_agg traverses this control flow if dialect_of!(self is PostgreSqlDialect) { - ends_with_wildcard = true; + ending_wildcard = Some(next_token); break; } else { return self.expected("an identifier after '.'", next_token); @@ -1492,19 +1445,22 @@ impl<'a> Parser<'a> { } } - if ends_with_wildcard { + if let Some(wildcard_token) = ending_wildcard { let id_parts = Self::exprs_to_idents(&expressions); if id_parts.len() != expressions.len() { return self.expected("an identifier or a '*' after '.'", self.peek_token()); } let id_parts = id_parts.into_iter().cloned().collect::>(); - Ok(Expr::QualifiedWildcard(ObjectName(id_parts))) + Ok(Expr::QualifiedWildcard( + ObjectName(id_parts), + AttachedToken(wildcard_token), + )) } else if self.consume_token(&Token::LParen) { let id_parts = Self::exprs_to_idents(&expressions); if id_parts.len() != expressions.len() { return self.expected("an identifier or a '*' after '.'", self.peek_token()); } - let id_parts: Vec = id_parts.into_iter().cloned().collect::>(); + let id_parts = id_parts.into_iter().cloned().collect::>(); if dialect_of!(self is SnowflakeDialect | MsSqlDialect) && self.consume_tokens(&[Token::Plus, Token::RParen]) { @@ -3330,7 +3286,7 @@ impl<'a> Parser<'a> { }), _ => parser_err!( "Expected identifier or string literal", - self.peek_token().location + self.peek_token() ), }; self.expect_token(&Token::RBracket)?; From fc1cd59650736127f946ce386edd3e670f0c8052 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 4 Dec 2024 21:33:05 +0800 Subject: [PATCH 06/30] introduce CompoundExpr --- src/ast/mod.rs | 35 ++++++--- src/ast/spans.rs | 15 +++- src/parser/mod.rs | 157 ++++++++++++++++++-------------------- tests/sqlparser_common.rs | 78 ++++++++++++------- 4 files changed, 159 insertions(+), 126 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c2b558d06..284ea3420 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -577,6 +577,12 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), + /// Multi-part Expression accessing. It's used to represent a access chain from a root expression. + /// e.g. `expr[0]`, `expr[0][0]`, or `expr.field1.filed2[1].field3`, ... + CompoundExpr { + root: Box, + chain: Vec, + }, /// Access data nested in a value containing semi-structured data, such as /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. /// @@ -916,11 +922,6 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps Map(Map), - /// An access of nested data using subscript syntax, for example `array[2]`. - Subscript { - expr: Box, - subscript: Box, - }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), /// An interval expression e.g. `INTERVAL '1' YEAR` @@ -1037,6 +1038,14 @@ impl fmt::Display for Subscript { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AccessField { + Expr(Expr), + SubScript(Subscript), +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1235,6 +1244,16 @@ impl fmt::Display for Expr { Expr::Wildcard(_) => f.write_str("*"), Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), + Expr::CompoundExpr { root, chain } => { + write!(f, "{}", root)?; + for field in chain { + match field { + AccessField::Expr(expr) => write!(f, ".{}", expr)?, + AccessField::SubScript(subscript) => write!(f, "[{}]", subscript)?, + } + } + Ok(()) + } Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"), Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"), Expr::IsFalse(ast) => write!(f, "{ast} IS FALSE"), @@ -1654,12 +1673,6 @@ impl fmt::Display for Expr { Expr::Map(map) => { write!(f, "{map}") } - Expr::Subscript { - expr, - subscript: key, - } => { - write!(f, "{expr}[{key}]") - } Expr::Array(set) => { write!(f, "{set}") } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index abe012530..cd447fe23 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -3,7 +3,7 @@ use core::iter; use crate::tokenizer::Span; use super::{ - AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, Assignment, + AccessField, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, @@ -1202,6 +1202,9 @@ impl Spanned for Expr { Expr::Identifier(ident) => ident.span, Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i| i.span)), Expr::CompositeAccess { expr, key } => expr.span().union(&key.span), + Expr::CompoundExpr { root, chain } => { + union_spans(iter::once(root.span()).chain(chain.iter().map(|i| i.span()))) + } Expr::IsFalse(expr) => expr.span(), Expr::IsNotFalse(expr) => expr.span(), Expr::IsTrue(expr) => expr.span(), @@ -1371,7 +1374,6 @@ impl Spanned for Expr { Expr::Named { .. } => Span::empty(), Expr::Dictionary(_) => Span::empty(), Expr::Map(_) => Span::empty(), - Expr::Subscript { expr, subscript } => expr.span().union(&subscript.span()), Expr::Interval(interval) => interval.value.span(), Expr::Wildcard(token) => token.0.span, Expr::QualifiedWildcard(object_name, token) => union_spans( @@ -1410,6 +1412,15 @@ impl Spanned for Subscript { } } +impl Spanned for AccessField { + fn span(&self) -> Span { + match self { + AccessField::Expr(ident) => ident.span(), + AccessField::SubScript(subscript) => subscript.span(), + } + } +} + impl Spanned for ObjectName { fn span(&self) -> Span { let ObjectName(segments) = self; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 027b07697..0c8ff0604 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1146,35 +1146,40 @@ impl<'a> Parser<'a> { w_span: Span, ) -> Result { match self.peek_token().token { - Token::Period => { - self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span))) - }, + Token::Period => self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span)), vec![]), Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; let mut expr = self.parse_function(ObjectName(id_parts))?; + // consume all period if it's a method chain + if self.dialect.supports_methods() { + expr = self.try_parse_method(expr)? + } + let mut fields = vec![]; // if the function returns an array, it can be subscripted if self.consume_token(&Token::LBracket) { - expr = self.parse_multi_dim_subscript(expr)?; + self.parse_multi_dim_subscript(&mut fields)?; } - self.parse_compound_expr(expr) - }, + self.parse_compound_expr(expr, fields) + } Token::LBracket => { let _ = self.consume_token(&Token::LBracket); let ident = Expr::Identifier(w.to_ident(w_span)); - let subscript = self.parse_multi_dim_subscript(ident)?; - self.parse_compound_expr(subscript) + let mut fields = vec![]; + self.parse_multi_dim_subscript(&mut fields)?; + dbg!(&fields); + self.parse_compound_expr(ident, fields) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) | Token::HexStringLiteral(_) - if w.value.starts_with('_') => - { - Ok(Expr::IntroducedString { - introducer: w.value.clone(), - value: self.parse_introduced_string_value()?, - }) - } + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value.clone(), + value: self.parse_introduced_string_value()?, + }) + } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) @@ -1411,20 +1416,21 @@ impl<'a> Parser<'a> { } } - pub fn parse_compound_expr(&mut self, init: Expr) -> Result { - let mut expressions: Vec = vec![init]; + pub fn parse_compound_expr( + &mut self, + root: Expr, + mut chain: Vec, + ) -> Result { let mut ending_wildcard: Option = None; while self.consume_token(&Token::Period) { let next_token = self.next_token(); match next_token.token { Token::Word(w) => { - let expr = if self.consume_token(&Token::LBracket) { - let expr = Expr::Identifier(w.to_ident(next_token.span)); - self.parse_multi_dim_subscript(expr)? - } else { - Expr::Identifier(w.to_ident(next_token.span)) + let expr = Expr::Identifier(w.to_ident(next_token.span)); + chain.push(AccessField::Expr(expr)); + if self.consume_token(&Token::LBracket) && !self.dialect.supports_partiql() { + self.parse_multi_dim_subscript(&mut chain)? }; - expressions.push(expr) } Token::Mul => { // Postgres explicitly allows funcnm(tablenm.*) and the @@ -1437,7 +1443,8 @@ impl<'a> Parser<'a> { } } Token::SingleQuotedString(s) => { - expressions.push(Expr::Identifier(Ident::with_quote('\'', s))) + let expr = Expr::Identifier(Ident::with_quote('\'', s)); + chain.push(AccessField::Expr(expr)); } _ => { return self.expected("an identifier or a '*' after '.'", next_token); @@ -1446,21 +1453,17 @@ impl<'a> Parser<'a> { } if let Some(wildcard_token) = ending_wildcard { - let id_parts = Self::exprs_to_idents(&expressions); - if id_parts.len() != expressions.len() { + let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { return self.expected("an identifier or a '*' after '.'", self.peek_token()); - } - let id_parts = id_parts.into_iter().cloned().collect::>(); + }; Ok(Expr::QualifiedWildcard( ObjectName(id_parts), AttachedToken(wildcard_token), )) } else if self.consume_token(&Token::LParen) { - let id_parts = Self::exprs_to_idents(&expressions); - if id_parts.len() != expressions.len() { + let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { return self.expected("an identifier or a '*' after '.'", self.peek_token()); - } - let id_parts = id_parts.into_iter().cloned().collect::>(); + }; if dialect_of!(self is SnowflakeDialect | MsSqlDialect) && self.consume_tokens(&[Token::Plus, Token::RParen]) { @@ -1475,51 +1478,33 @@ impl<'a> Parser<'a> { self.parse_function(ObjectName(id_parts)) } } else { - let id_parts = Self::exprs_to_idents(&expressions); - if id_parts.len() == expressions.len() { - let id_parts: Vec = id_parts.into_iter().cloned().collect::>(); + if let Some(id_parts) = Self::exprs_to_idents(&root, &chain) { return Ok(Expr::CompoundIdentifier(id_parts)); } - self.exprs_to_composite_access(&expressions) - } - } - - fn exprs_to_idents(exprs: &[Expr]) -> Vec<&Ident> { - exprs - .iter() - .filter_map(|expr| { - if let Expr::Identifier(ident) = expr { - Some(ident) - } else { - None - } + if chain.is_empty() { + return Ok(root); + } + Ok(Expr::CompoundExpr { + root: Box::new(root), + chain: chain.clone(), }) - .collect() + } } - fn exprs_to_composite_access(&self, exprs: &[Expr]) -> Result { - let head = exprs.first().expect("at least one expr").clone(); - let remain = exprs[1..exprs.len()].to_vec(); - Ok(remain.iter().fold(head, |acc, e| match e { - Expr::Identifier(ident) => Expr::CompositeAccess { - expr: Box::new(acc), - key: ident.clone(), - }, - Expr::Subscript { expr, subscript } => { - let key = Expr::CompositeAccess { - expr: Box::new(acc), - key: match expr.as_ref() { - Expr::Identifier(ident) => ident.clone(), - _ => unreachable!(), - }, - }; - Expr::Subscript { - expr: Box::new(key), - subscript: subscript.clone(), - } + fn exprs_to_idents(root: &Expr, fields: &[AccessField]) -> Option> { + let mut idents = vec![]; + let Expr::Identifier(root) = root else { + return None; + }; + idents.push(root.clone()); + for x in fields { + if let AccessField::Expr(Expr::Identifier(ident)) = x { + idents.push(ident.clone()) + } else { + return None; } - _ => unreachable!(), - })) + } + Some(idents) } pub fn parse_utility_options(&mut self) -> Result, ParserError> { @@ -3088,7 +3073,12 @@ impl<'a> Parser<'a> { } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) { - self.parse_multi_dim_subscript(expr) + let mut chain = vec![]; + self.parse_multi_dim_subscript(&mut chain)?; + Ok(Expr::CompoundExpr { + root: Box::new(expr), + chain, + }) } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) @@ -3192,25 +3182,27 @@ impl<'a> Parser<'a> { /// Parse an multi-dimension array accessing like `[1:3][1][1]` /// /// Parser is right after the first `[` - pub fn parse_multi_dim_subscript(&mut self, mut expr: Expr) -> Result { + pub fn parse_multi_dim_subscript( + &mut self, + chain: &mut Vec, + ) -> Result<(), ParserError> { loop { - expr = self.parse_subscript(expr)?; + self.parse_subscript(chain)?; if !self.consume_token(&Token::LBracket) { break; } } - Ok(expr) + Ok(()) } /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` - pub fn parse_subscript(&mut self, expr: Expr) -> Result { + pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - Ok(Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(subscript), - }) + dbg!(&subscript); + chain.push(AccessField::SubScript(subscript)); + Ok(()) } fn parse_json_path_object_key(&mut self) -> Result { @@ -3284,10 +3276,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), key: Ident::new(s), }), - _ => parser_err!( - "Expected identifier or string literal", - self.peek_token() - ), + _ => parser_err!("Expected identifier or string literal", self.peek_token()), }; self.expect_token(&Token::RBracket)?; result diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1822f3925..91ef471f2 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -37,8 +37,8 @@ use sqlparser::dialect::{ }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; use sqlparser::tokenizer::Tokenizer; +use sqlparser::tokenizer::{Location, Span}; use test_utils::{ all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call, expr_from_projection, join, number, only, table, table_alias, TestedDialects, @@ -10324,19 +10324,39 @@ fn parse_map_access_expr() { Box::new(ClickHouseDialect {}), ]); let expr = dialects.verified_expr(sql); - let expected = Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("users"))), - subscript: Box::new(Subscript::Index { + let expected = Expr::CompoundExpr { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + chain: vec![ + AccessField::SubScript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, }), - }), - subscript: Box::new(Subscript::Index { - index: call("safe_offset", [Expr::Value(number("2"))]), - }), + AccessField::SubScript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }), + }), + ], }; assert_eq!(expr, expected); @@ -11117,26 +11137,26 @@ fn test_map_syntax() { }), ); - check( - "MAP {'a': 10, 'b': 20}['a']", - Expr::Subscript { - expr: Box::new(Expr::Map(Map { - entries: vec![ - MapEntry { - key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), - value: Box::new(number_expr("10")), - }, - MapEntry { - key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), - value: Box::new(number_expr("20")), - }, - ], - })), - subscript: Box::new(Subscript::Index { - index: Expr::Value(Value::SingleQuotedString("a".to_owned())), - }), - }, - ); + // check( + // "MAP {'a': 10, 'b': 20}['a']", + // Expr::Subscript { + // expr: Box::new(Expr::Map(Map { + // entries: vec![ + // MapEntry { + // key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + // value: Box::new(number_expr("10")), + // }, + // MapEntry { + // key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + // value: Box::new(number_expr("20")), + // }, + // ], + // })), + // subscript: Box::new(Subscript::Index { + // index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + // }), + // }, + // ); check("MAP {}", Expr::Map(Map { entries: vec![] })); } From 4ad37d82688ffc81467df8f7b22595f78b8f3b3b Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 4 Dec 2024 21:45:25 +0800 Subject: [PATCH 07/30] fix merge conflict --- src/parser/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 61b7ec7af..c8c57e2a8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1420,7 +1420,7 @@ impl<'a> Parser<'a> { root: Expr, mut chain: Vec, ) -> Result { - let mut ending_wildcard: Option = None; + let mut ending_wildcard: Option = None; while self.consume_token(&Token::Period) { let next_token = self.next_token(); match next_token.token { @@ -3201,7 +3201,6 @@ impl<'a> Parser<'a> { /// Parser is right after `[` pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - dbg!(&subscript); chain.push(AccessField::SubScript(subscript)); Ok(()) } From 31a1e749252c3eb30530d867ec1cd0ede0eae430 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 4 Dec 2024 22:06:37 +0800 Subject: [PATCH 08/30] replace subscript with compound expr --- src/parser/mod.rs | 1 - tests/sqlparser_bigquery.rs | 74 ++++++++++++++++------------ tests/sqlparser_clickhouse.rs | 16 +++---- tests/sqlparser_duckdb.rs | 8 ++-- tests/sqlparser_postgres.rs | 90 +++++++++++++++++------------------ 5 files changed, 100 insertions(+), 89 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c8c57e2a8..d32dc863b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1165,7 +1165,6 @@ impl<'a> Parser<'a> { let ident = Expr::Identifier(w.to_ident(w_span)); let mut fields = vec![]; self.parse_multi_dim_subscript(&mut fields)?; - dbg!(&fields); self.parse_compound_expr(ident, fields) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 40345cb29..1ebe95a2b 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -23,7 +23,7 @@ use std::ops::Deref; use sqlparser::ast::*; use sqlparser::dialect::{BigQueryDialect, GenericDialect}; use sqlparser::parser::{ParserError, ParserOptions}; -use sqlparser::tokenizer::Span; +use sqlparser::tokenizer::{Location, Span}; use test_utils::*; #[test] @@ -1969,36 +1969,48 @@ fn parse_map_access_expr() { let sql = "users[-1][safe_offset(2)].a.b"; let expr = bigquery().verified_expr(sql); - fn composite_access(expr: Expr, key: impl Into) -> Expr { - Expr::CompositeAccess { - expr: Box::new(expr), - key: key.into(), - } - } - - fn subscript(expr: Expr, index: Expr) -> Expr { - Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(Subscript::Index { index }), - } - } - - let expected = composite_access( - composite_access( - subscript( - subscript( - Expr::Identifier("users".into()), - Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Box::new(Expr::Value(number("1"))), - }, - ), - call("safe_offset", vec![Expr::Value(number("2"))]), - ), - "a", - ), - "b", - ); + let expected = Expr::CompoundExpr { + root: Box::new(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 1), Location::of(1, 6)), + "users", + ))), + chain: vec![ + AccessField::SubScript(Subscript::Index { + index: Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Expr::Value(number("1")).into(), + }, + }), + AccessField::SubScript(Subscript::Index { + index: Expr::Function(Function { + name: ObjectName(vec![Ident::with_span( + Span::new(Location::of(1, 11), Location::of(1, 22)), + "safe_offset", + )]), + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( + number("2"), + )))], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }), + }), + AccessField::Expr(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 24), Location::of(1, 25)), + "a", + ))), + AccessField::Expr(Expr::Identifier(Ident::with_span( + Span::new(Location::of(1, 26), Location::of(1, 27)), + "b", + ))), + ], + }; assert_eq!(expr, expected); let sql = "SELECT myfunc()[-1].a[SAFE_OFFSET(2)].b"; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index f09b33f72..9c8dc7d88 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -44,13 +44,13 @@ fn parse_map_access_expr() { select_token: AttachedToken::empty(), top: None, top_before_distinct: false, - projection: vec![UnnamedExpr(Expr::Subscript { - expr: Box::new(Identifier(Ident { + projection: vec![UnnamedExpr(Expr::CompoundExpr { + root: Box::new(Identifier(Ident { value: "string_values".to_string(), quote_style: None, span: Span::empty(), })), - subscript: Box::new(Subscript::Index { + chain: vec![AccessField::SubScript(Subscript::Index { index: call( "indexOf", [ @@ -58,7 +58,7 @@ fn parse_map_access_expr() { Expr::Value(Value::SingleQuotedString("endpoint".to_string())) ] ), - }), + })], })], into: None, from: vec![TableWithJoins { @@ -84,9 +84,9 @@ fn parse_map_access_expr() { }), op: BinaryOperator::And, right: Box::new(BinaryOp { - left: Box::new(Expr::Subscript { - expr: Box::new(Identifier(Ident::new("string_value"))), - subscript: Box::new(Subscript::Index { + left: Box::new(Expr::CompoundExpr { + root: Box::new(Identifier(Ident::new("string_value"))), + chain: vec![AccessField::SubScript(Subscript::Index { index: call( "indexOf", [ @@ -94,7 +94,7 @@ fn parse_map_access_expr() { Expr::Value(Value::SingleQuotedString("app".to_string())) ] ), - }), + })], }), op: BinaryOperator::NotEq, right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 01ac0649a..2f74b6d5a 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -647,8 +647,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Array(Array { + &Expr::CompoundExpr { + root: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -656,9 +656,9 @@ fn test_array_index() { ], named: false })), - subscript: Box::new(Subscript::Index { + chain: vec![AccessField::SubScript(Subscript::Index { index: Expr::Value(number("3")) - }) + })] }, expr ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 92368e9ee..ee2006601 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2095,11 +2095,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundExpr { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + chain: vec![AccessField::SubScript(Subscript::Index { index: num[0].clone() - }), + })], }, expr_from_projection(only(&select.projection)), ); @@ -2107,16 +2107,15 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("foo"))), - subscript: Box::new(Subscript::Index { + &Expr::CompoundExpr { + root: Box::new(Expr::Identifier(Ident::new("foo"))), + chain: vec![ + AccessField::SubScript(Subscript::Index { index: num[0].clone() }), - }), - subscript: Box::new(Subscript::Index { + AccessField::SubScript(Subscript::Index { index: num[0].clone() - }), + })], }, expr_from_projection(only(&select.projection)), ); @@ -2124,29 +2123,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Identifier(Ident::new("bar"))), - subscript: Box::new(Subscript::Index { - index: num[0].clone() - }) + &Expr::CompoundExpr { + root: Box::new(Expr::Identifier(Ident::new("bar"))), + chain: vec![ + AccessField::SubScript(Subscript::Index { + index: num[0].clone() }), - subscript: Box::new(Subscript::Index { + AccessField::SubScript(Subscript::Index { index: Expr::Identifier(Ident { value: "baz".to_string(), quote_style: Some('"'), span: Span::empty(), }) - }) - }), - subscript: Box::new(Subscript::Index { - index: Expr::Identifier(Ident { - value: "fooz".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }) - }) + }), + AccessField::SubScript(Subscript::Index { + index: Expr::Identifier(Ident { + value: "fooz".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }) + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2154,9 +2151,8 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(Expr::Nested(Box::new(Expr::Cast { + &Expr::CompoundExpr { + root: Box::new(Expr::Nested(Box::new(Expr::Cast { kind: CastKind::Cast, expr: Box::new(Expr::Array(Array { elem: vec![Expr::Array(Array { @@ -2174,13 +2170,14 @@ fn parse_array_index_expr() { )), format: None, }))), - subscript: Box::new(Subscript::Index { + chain: vec![ + AccessField::SubScript(Subscript::Index { index: num[1].clone() }), - }), - subscript: Box::new(Subscript::Index { - index: num[2].clone() - }), + AccessField::SubScript(Subscript::Index { + index: num[2].clone() + }), + ], }, expr_from_projection(only(&select.projection)), ); @@ -2269,9 +2266,12 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundExpr { chain, .. } = pg_and_generic().verified_expr(sql) else { panic!("expected subscript expr"); }; + let Some(AccessField::SubScript(subscript)) = chain.last() else { + panic!("expected subscript"); + }; assert_eq!(expect, *subscript); } @@ -2282,9 +2282,8 @@ fn parse_array_subscript() { fn parse_array_multi_subscript() { let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); assert_eq!( - Expr::Subscript { - expr: Box::new(Expr::Subscript { - expr: Box::new(call( + Expr::CompoundExpr { + root: Box::new(call( "make_array", vec![ Expr::Value(number("1")), @@ -2292,15 +2291,16 @@ fn parse_array_multi_subscript() { Expr::Value(number("3")) ] )), - subscript: Box::new(Subscript::Slice { + chain: vec![ + AccessField::SubScript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), stride: None, }), - }), - subscript: Box::new(Subscript::Index { - index: Expr::Value(number("2")), - }), + AccessField::SubScript(Subscript::Index { + index: Expr::Value(number("2")), + }), + ], }, expr, ); From 035529019bfba7df7ad085b09ba8b0028b549885 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 5 Dec 2024 00:57:39 +0800 Subject: [PATCH 09/30] fix snowflake syntax --- src/dialect/snowflake.rs | 4 +++ src/parser/mod.rs | 75 ++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 77d2ccff1..0596d7c3c 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -234,6 +234,10 @@ impl Dialect for SnowflakeDialect { RESERVED_FOR_IDENTIFIER.contains(&kw) } } + + fn supports_partiql(&self) -> bool { + true + } } /// Parse snowflake create table statement. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d32dc863b..8de80790c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1148,17 +1148,29 @@ impl<'a> Parser<'a> { Token::Period => self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span)), vec![]), Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; - let mut expr = self.parse_function(ObjectName(id_parts))?; - // consume all period if it's a method chain - if self.dialect.supports_methods() { - expr = self.try_parse_method(expr)? - } - let mut fields = vec![]; - // if the function returns an array, it can be subscripted - if self.consume_token(&Token::LBracket) { - self.parse_multi_dim_subscript(&mut fields)?; + // parse_comma_outer_join is used to parse the following pattern: + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) + { + Ok(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + let mut expr = self.parse_function(ObjectName(id_parts))?; + // consume all period if it's a method chain + if self.dialect.supports_methods() { + expr = self.try_parse_method(expr)? + } + let mut fields = vec![]; + // if the function returns an array, it can be subscripted + if self.consume_token(&Token::LBracket) { + self.parse_multi_dim_subscript(&mut fields)?; + } + self.parse_compound_expr(expr, fields) } - self.parse_compound_expr(expr, fields) } Token::LBracket => { let _ = self.consume_token(&Token::LBracket); @@ -1420,15 +1432,21 @@ impl<'a> Parser<'a> { mut chain: Vec, ) -> Result { let mut ending_wildcard: Option = None; + let mut ending_lbracket = false; while self.consume_token(&Token::Period) { let next_token = self.next_token(); match next_token.token { Token::Word(w) => { let expr = Expr::Identifier(w.to_ident(next_token.span)); chain.push(AccessField::Expr(expr)); - if self.consume_token(&Token::LBracket) && !self.dialect.supports_partiql() { - self.parse_multi_dim_subscript(&mut chain)? - }; + if self.consume_token(&Token::LBracket) { + if self.dialect.supports_partiql() { + ending_lbracket = true; + break; + } else { + self.parse_multi_dim_subscript(&mut chain)? + } + } } Token::Mul => { // Postgres explicitly allows funcnm(tablenm.*) and the @@ -1450,6 +1468,12 @@ impl<'a> Parser<'a> { } } + // if dialect supports partiql, we need to go back one Token::LBracket for the JsonAccess parsing + if self.dialect.supports_partiql() && ending_lbracket { + self.prev_token(); + } + + if let Some(wildcard_token) = ending_wildcard { let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { return self.expected("an identifier or a '*' after '.'", self.peek_token()); @@ -3075,11 +3099,9 @@ impl<'a> Parser<'a> { { let mut chain = vec![]; self.parse_multi_dim_subscript(&mut chain)?; - Ok(Expr::CompoundExpr { - root: Box::new(expr), - chain, - }) - } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { + self.parse_compound_expr(expr, chain) + + } else if self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) } else { @@ -3266,14 +3288,17 @@ impl<'a> Parser<'a> { pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_expr()?; let result = match key { - Expr::Identifier(ident) => Ok(Expr::CompositeAccess { - expr: Box::new(expr), - key: ident, + Expr::Identifier(_) => Ok(Expr::CompoundExpr { + root: Box::new(expr), + chain: vec![AccessField::Expr(key)], }), - Expr::Value(Value::SingleQuotedString(s)) - | Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompositeAccess { - expr: Box::new(expr), - key: Ident::new(s), + Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundExpr { + root: Box::new(expr), + chain: vec![AccessField::Expr(key)], + }), + Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundExpr { + root: Box::new(expr), + chain: vec![AccessField::Expr(Expr::Identifier(Ident::new(s)))], }), _ => parser_err!("Expected identifier or string literal", self.peek_token()), }; From 1de9b214e6741b26923e0ea61db820e8e3447411 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 5 Dec 2024 00:58:34 +0800 Subject: [PATCH 10/30] limit the access chain supported dialect --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8de80790c..b2610b732 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1172,7 +1172,7 @@ impl<'a> Parser<'a> { self.parse_compound_expr(expr, fields) } } - Token::LBracket => { + Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => { let _ = self.consume_token(&Token::LBracket); let ident = Expr::Identifier(w.to_ident(w_span)); let mut fields = vec![]; From 2a32b9f553f31cd7001c7ca6e1fc6f2f82ab60dd Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 5 Dec 2024 01:01:26 +0800 Subject: [PATCH 11/30] fmt --- src/parser/mod.rs | 17 ++++++------- tests/sqlparser_postgres.rs | 49 +++++++++++++++++++------------------ 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b2610b732..6f0a1abd3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1172,7 +1172,8 @@ impl<'a> Parser<'a> { self.parse_compound_expr(expr, fields) } } - Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => { + Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => + { let _ = self.consume_token(&Token::LBracket); let ident = Expr::Identifier(w.to_ident(w_span)); let mut fields = vec![]; @@ -1440,12 +1441,12 @@ impl<'a> Parser<'a> { let expr = Expr::Identifier(w.to_ident(next_token.span)); chain.push(AccessField::Expr(expr)); if self.consume_token(&Token::LBracket) { - if self.dialect.supports_partiql() { - ending_lbracket = true; - break; - } else { - self.parse_multi_dim_subscript(&mut chain)? - } + if self.dialect.supports_partiql() { + ending_lbracket = true; + break; + } else { + self.parse_multi_dim_subscript(&mut chain)? + } } } Token::Mul => { @@ -1473,7 +1474,6 @@ impl<'a> Parser<'a> { self.prev_token(); } - if let Some(wildcard_token) = ending_wildcard { let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { return self.expected("an identifier or a '*' after '.'", self.peek_token()); @@ -3100,7 +3100,6 @@ impl<'a> Parser<'a> { let mut chain = vec![]; self.parse_multi_dim_subscript(&mut chain)?; self.parse_compound_expr(expr, chain) - } else if self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ee2006601..247fa9ab3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2114,8 +2114,9 @@ fn parse_array_index_expr() { index: num[0].clone() }), AccessField::SubScript(Subscript::Index { - index: num[0].clone() - })], + index: num[0].clone() + }) + ], }, expr_from_projection(only(&select.projection)), ); @@ -2153,23 +2154,23 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundExpr { root: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], - named: true, - })], + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), - None - ))), + })], + named: true, + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), None - )), - format: None, - }))), + ))), + None + )), + format: None, + }))), chain: vec![ AccessField::SubScript(Subscript::Index { index: num[1].clone() @@ -2284,13 +2285,13 @@ fn parse_array_multi_subscript() { assert_eq!( Expr::CompoundExpr { root: Box::new(call( - "make_array", - vec![ - Expr::Value(number("1")), - Expr::Value(number("2")), - Expr::Value(number("3")) - ] - )), + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), chain: vec![ AccessField::SubScript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), From 495d1b3870a3bcc460b5a71405f1389e57277f6d Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 5 Dec 2024 01:23:52 +0800 Subject: [PATCH 12/30] enhance doc and fix the name --- src/ast/mod.rs | 15 ++++++++++-- src/ast/spans.rs | 2 +- src/parser/mod.rs | 7 ++++-- tests/sqlparser_bigquery.rs | 4 ++-- tests/sqlparser_clickhouse.rs | 4 ++-- tests/sqlparser_common.rs | 44 +++++++++++++++++------------------ tests/sqlparser_duckdb.rs | 2 +- tests/sqlparser_postgres.rs | 22 +++++++++--------- 8 files changed, 57 insertions(+), 43 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1bc2084d1..31af8f6c6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1053,12 +1053,23 @@ impl fmt::Display for Subscript { } } +/// The contents inside the `.` in an access chain. +/// It can be an expression or a subscript. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AccessField { Expr(Expr), - SubScript(Subscript), + Subscript(Subscript), +} + +impl fmt::Display for AccessField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AccessField::Expr(expr) => write!(f, "{}", expr), + AccessField::Subscript(subscript) => write!(f, "{}", subscript), + } + } } /// A lambda function. @@ -1264,7 +1275,7 @@ impl fmt::Display for Expr { for field in chain { match field { AccessField::Expr(expr) => write!(f, ".{}", expr)?, - AccessField::SubScript(subscript) => write!(f, "[{}]", subscript)?, + AccessField::Subscript(subscript) => write!(f, "[{}]", subscript)?, } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 00fbd47b3..0b59b86fe 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1447,7 +1447,7 @@ impl Spanned for AccessField { fn span(&self) -> Span { match self { AccessField::Expr(ident) => ident.span(), - AccessField::SubScript(subscript) => subscript.span(), + AccessField::Subscript(subscript) => subscript.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6f0a1abd3..840090de2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -960,7 +960,6 @@ impl<'a> Parser<'a> { /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; - debug!("precedence: {}", precedence); debug!("parsing expr"); let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); @@ -1427,6 +1426,10 @@ impl<'a> Parser<'a> { } } + /// Try to parse an [Expr::CompoundExpr] like `a.b.c` or `a.b[1].c`. + /// If all the fields are `Expr::Identifier`s, return an [Expr::CompoundIdentifier] instead. + /// If only the root exists, return the root. + /// If self supports [Dialect::supports_partiql], it will fall back when occurs [Token::LBracket] for JsonAccess parsing. pub fn parse_compound_expr( &mut self, root: Expr, @@ -3221,7 +3224,7 @@ impl<'a> Parser<'a> { /// Parser is right after `[` pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - chain.push(AccessField::SubScript(subscript)); + chain.push(AccessField::Subscript(subscript)); Ok(()) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 1ebe95a2b..880e6993a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1975,13 +1975,13 @@ fn parse_map_access_expr() { "users", ))), chain: vec![ - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::Function(Function { name: ObjectName(vec![Ident::with_span( Span::new(Location::of(1, 11), Location::of(1, 22)), diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 9c8dc7d88..560651722 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,7 +50,7 @@ fn parse_map_access_expr() { quote_style: None, span: Span::empty(), })), - chain: vec![AccessField::SubScript(Subscript::Index { + chain: vec![AccessField::Subscript(Subscript::Index { index: call( "indexOf", [ @@ -86,7 +86,7 @@ fn parse_map_access_expr() { right: Box::new(BinaryOp { left: Box::new(Expr::CompoundExpr { root: Box::new(Identifier(Ident::new("string_value"))), - chain: vec![AccessField::SubScript(Subscript::Index { + chain: vec![AccessField::Subscript(Subscript::Index { index: call( "indexOf", [ diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a787d9695..7a10d3fff 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10330,13 +10330,13 @@ fn parse_map_access_expr() { "users", ))), chain: vec![ - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::Function(Function { name: ObjectName(vec![Ident::with_span( Span::new(Location::of(1, 11), Location::of(1, 22)), @@ -11137,26 +11137,26 @@ fn test_map_syntax() { }), ); - // check( - // "MAP {'a': 10, 'b': 20}['a']", - // Expr::Subscript { - // expr: Box::new(Expr::Map(Map { - // entries: vec![ - // MapEntry { - // key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), - // value: Box::new(number_expr("10")), - // }, - // MapEntry { - // key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), - // value: Box::new(number_expr("20")), - // }, - // ], - // })), - // subscript: Box::new(Subscript::Index { - // index: Expr::Value(Value::SingleQuotedString("a".to_owned())), - // }), - // }, - // ); + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::CompoundExpr { + root: Box::new(Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + chain: vec![AccessField::Subscript(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + })], + }, + ); check("MAP {}", Expr::Map(Map { entries: vec![] })); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 2f74b6d5a..825af376b 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -656,7 +656,7 @@ fn test_array_index() { ], named: false })), - chain: vec![AccessField::SubScript(Subscript::Index { + chain: vec![AccessField::Subscript(Subscript::Index { index: Expr::Value(number("3")) })] }, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 247fa9ab3..b3d5ea3be 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2097,7 +2097,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("foo"))), - chain: vec![AccessField::SubScript(Subscript::Index { + chain: vec![AccessField::Subscript(Subscript::Index { index: num[0].clone() })], }, @@ -2110,10 +2110,10 @@ fn parse_array_index_expr() { &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("foo"))), chain: vec![ - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: num[0].clone() }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: num[0].clone() }) ], @@ -2127,17 +2127,17 @@ fn parse_array_index_expr() { &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("bar"))), chain: vec![ - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: num[0].clone() }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "baz".to_string(), quote_style: Some('"'), span: Span::empty(), }) }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"'), @@ -2172,10 +2172,10 @@ fn parse_array_index_expr() { format: None, }))), chain: vec![ - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: num[1].clone() }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: num[2].clone() }), ], @@ -2270,7 +2270,7 @@ fn parse_array_subscript() { let Expr::CompoundExpr { chain, .. } = pg_and_generic().verified_expr(sql) else { panic!("expected subscript expr"); }; - let Some(AccessField::SubScript(subscript)) = chain.last() else { + let Some(AccessField::Subscript(subscript)) = chain.last() else { panic!("expected subscript"); }; assert_eq!(expect, *subscript); @@ -2293,12 +2293,12 @@ fn parse_array_multi_subscript() { ] )), chain: vec![ - AccessField::SubScript(Subscript::Slice { + AccessField::Subscript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), stride: None, }), - AccessField::SubScript(Subscript::Index { + AccessField::Subscript(Subscript::Index { index: Expr::Value(number("2")), }), ], From e7b55be9cbbcd533399ff0926dba5596117c2be1 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Thu, 5 Dec 2024 01:40:18 +0800 Subject: [PATCH 13/30] fix typo --- src/ast/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 31af8f6c6..bde57c0f8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -590,7 +590,7 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), - /// Multi-part Expression accessing. It's used to represent a access chain from a root expression. + /// Multi-part Expression accessing. It's used to represent an access chain from a root expression. /// e.g. `expr[0]`, `expr[0][0]`, or `expr.field1.filed2[1].field3`, ... CompoundExpr { root: Box, From 47a5da15ff4b2a185c7a0b16297c6f4386196f80 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 9 Dec 2024 22:56:23 +0800 Subject: [PATCH 14/30] update doc --- src/ast/mod.rs | 17 ++++++++++++++++- src/ast/spans.rs | 28 ++++++++++++++-------------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 487d55832..f8e269372 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -593,7 +593,22 @@ pub enum Expr { /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), /// Multi-part Expression accessing. It's used to represent an access chain from a root expression. - /// e.g. `expr[0]`, `expr[0][0]`, or `expr.field1.filed2[1].field3`, ... + /// + /// For example: + /// - Array + /// - A 1-dim array a[1] will be represented like: + /// `CompoundExpr(Ident('a'), vec![Subscript(1)]` + /// - A 2-dim array a[1][2] will be represented like: + /// `CompoundExpr(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// - Map or Struct (Bracket-style) + /// - A map a['field1'] will be represented like: + /// `CompoundExpr(Ident('a'), vec![Subscript('field')]` + /// - A 2-dim map a['field1']['field2'] will be represented like: + /// `CompoundExpr(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) + /// - A struct access a[field1].field2 will be represented like: + /// `CompoundExpr(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// - If a struct access likes a.field1.field2, it will be represented by CompoundIdentifer([a, field1, field2]) CompoundExpr { root: Box, chain: Vec, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index b92bd7842..da7ee8851 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -3,20 +3,20 @@ use core::iter; use crate::tokenizer::Span; use super::{ - AccessField, dcl::SecondaryRoles, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, - Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, - ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, - CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, - Expr, ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, - FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, - IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, Measure, NamedWindowDefinition, - ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByExpr, Partition, - PivotValueSource, ProjectionSelect, Query, ReferentialAction, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, - Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, - TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values, ViewColumnDef, - WildcardAdditionalOptions, With, WithFill, + dcl::SecondaryRoles, AccessField, AlterColumnOperation, AlterIndexOperation, + AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, + ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, + CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, + ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch, FromTable, Function, + FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, + GroupByExpr, HavingBound, IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join, + JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, + Measure, NamedWindowDefinition, ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, + OrderBy, OrderByExpr, Partition, PivotValueSource, ProjectionSelect, Query, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values, + ViewColumnDef, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. From b58e50c37ecb9235d7e9a80c50ec80393dd02f15 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 9 Dec 2024 23:02:50 +0800 Subject: [PATCH 15/30] update doc and rename AccessExpr --- src/ast/mod.rs | 20 +++++++++++--------- src/ast/spans.rs | 8 ++++---- src/parser/mod.rs | 22 +++++++++++----------- tests/sqlparser_bigquery.rs | 8 ++++---- tests/sqlparser_clickhouse.rs | 4 ++-- tests/sqlparser_common.rs | 6 +++--- tests/sqlparser_duckdb.rs | 2 +- tests/sqlparser_postgres.rs | 22 +++++++++++----------- 8 files changed, 47 insertions(+), 45 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f8e269372..829324e87 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -611,7 +611,7 @@ pub enum Expr { /// - If a struct access likes a.field1.field2, it will be represented by CompoundIdentifer([a, field1, field2]) CompoundExpr { root: Box, - chain: Vec, + chain: Vec, }, /// Access data nested in a value containing semi-structured data, such as /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. @@ -1070,21 +1070,23 @@ impl fmt::Display for Subscript { } } -/// The contents inside the `.` in an access chain. +/// An element of a [`Expr::CompoundExpr`]. /// It can be an expression or a subscript. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum AccessField { - Expr(Expr), +pub enum AccessExpr { + /// Accesses a field using dot notation, e.g. `foo.bar.baz`. + Dot(Expr), + /// Accesses a field or array element using bracket notation, e.g. `foo['bar']`. Subscript(Subscript), } -impl fmt::Display for AccessField { +impl fmt::Display for AccessExpr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - AccessField::Expr(expr) => write!(f, "{}", expr), - AccessField::Subscript(subscript) => write!(f, "{}", subscript), + AccessExpr::Dot(expr) => write!(f, "{}", expr), + AccessExpr::Subscript(subscript) => write!(f, "{}", subscript), } } } @@ -1291,8 +1293,8 @@ impl fmt::Display for Expr { write!(f, "{}", root)?; for field in chain { match field { - AccessField::Expr(expr) => write!(f, ".{}", expr)?, - AccessField::Subscript(subscript) => write!(f, "[{}]", subscript)?, + AccessExpr::Dot(expr) => write!(f, ".{}", expr)?, + AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript)?, } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index da7ee8851..57b83a794 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -3,7 +3,7 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, AccessField, AlterColumnOperation, AlterIndexOperation, + dcl::SecondaryRoles, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, @@ -1454,11 +1454,11 @@ impl Spanned for Subscript { } } -impl Spanned for AccessField { +impl Spanned for AccessExpr { fn span(&self) -> Span { match self { - AccessField::Expr(ident) => ident.span(), - AccessField::Subscript(subscript) => subscript.span(), + AccessExpr::Dot(ident) => ident.span(), + AccessExpr::Subscript(subscript) => subscript.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index beb33bfda..1bb85e176 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1433,7 +1433,7 @@ impl<'a> Parser<'a> { pub fn parse_compound_expr( &mut self, root: Expr, - mut chain: Vec, + mut chain: Vec, ) -> Result { let mut ending_wildcard: Option = None; let mut ending_lbracket = false; @@ -1442,7 +1442,7 @@ impl<'a> Parser<'a> { match next_token.token { Token::Word(w) => { let expr = Expr::Identifier(w.to_ident(next_token.span)); - chain.push(AccessField::Expr(expr)); + chain.push(AccessExpr::Dot(expr)); if self.consume_token(&Token::LBracket) { if self.dialect.supports_partiql() { ending_lbracket = true; @@ -1464,7 +1464,7 @@ impl<'a> Parser<'a> { } Token::SingleQuotedString(s) => { let expr = Expr::Identifier(Ident::with_quote('\'', s)); - chain.push(AccessField::Expr(expr)); + chain.push(AccessExpr::Dot(expr)); } _ => { return self.expected("an identifier or a '*' after '.'", next_token); @@ -1516,14 +1516,14 @@ impl<'a> Parser<'a> { } } - fn exprs_to_idents(root: &Expr, fields: &[AccessField]) -> Option> { + fn exprs_to_idents(root: &Expr, fields: &[AccessExpr]) -> Option> { let mut idents = vec![]; let Expr::Identifier(root) = root else { return None; }; idents.push(root.clone()); for x in fields { - if let AccessField::Expr(Expr::Identifier(ident)) = x { + if let AccessExpr::Dot(Expr::Identifier(ident)) = x { idents.push(ident.clone()) } else { return None; @@ -3208,7 +3208,7 @@ impl<'a> Parser<'a> { /// Parser is right after the first `[` pub fn parse_multi_dim_subscript( &mut self, - chain: &mut Vec, + chain: &mut Vec, ) -> Result<(), ParserError> { loop { self.parse_subscript(chain)?; @@ -3222,9 +3222,9 @@ impl<'a> Parser<'a> { /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` - pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { + pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; - chain.push(AccessField::Subscript(subscript)); + chain.push(AccessExpr::Subscript(subscript)); Ok(()) } @@ -3292,15 +3292,15 @@ impl<'a> Parser<'a> { let result = match key { Expr::Identifier(_) => Ok(Expr::CompoundExpr { root: Box::new(expr), - chain: vec![AccessField::Expr(key)], + chain: vec![AccessExpr::Dot(key)], }), Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundExpr { root: Box::new(expr), - chain: vec![AccessField::Expr(key)], + chain: vec![AccessExpr::Dot(key)], }), Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundExpr { root: Box::new(expr), - chain: vec![AccessField::Expr(Expr::Identifier(Ident::new(s)))], + chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new(s)))], }), _ => parser_err!("Expected identifier or string literal", self.peek_token()), }; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 880e6993a..8ece6c182 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1975,13 +1975,13 @@ fn parse_map_access_expr() { "users", ))), chain: vec![ - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Function(Function { name: ObjectName(vec![Ident::with_span( Span::new(Location::of(1, 11), Location::of(1, 22)), @@ -2001,11 +2001,11 @@ fn parse_map_access_expr() { within_group: vec![], }), }), - AccessField::Expr(Expr::Identifier(Ident::with_span( + AccessExpr::Dot(Expr::Identifier(Ident::with_span( Span::new(Location::of(1, 24), Location::of(1, 25)), "a", ))), - AccessField::Expr(Expr::Identifier(Ident::with_span( + AccessExpr::Dot(Expr::Identifier(Ident::with_span( Span::new(Location::of(1, 26), Location::of(1, 27)), "b", ))), diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 560651722..6d30ea1e5 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,7 +50,7 @@ fn parse_map_access_expr() { quote_style: None, span: Span::empty(), })), - chain: vec![AccessField::Subscript(Subscript::Index { + chain: vec![AccessExpr::Subscript(Subscript::Index { index: call( "indexOf", [ @@ -86,7 +86,7 @@ fn parse_map_access_expr() { right: Box::new(BinaryOp { left: Box::new(Expr::CompoundExpr { root: Box::new(Identifier(Ident::new("string_value"))), - chain: vec![AccessField::Subscript(Subscript::Index { + chain: vec![AccessExpr::Subscript(Subscript::Index { index: call( "indexOf", [ diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e6b1f3e96..e3da7b70f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10331,13 +10331,13 @@ fn parse_map_access_expr() { "users", ))), chain: vec![ - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, expr: Expr::Value(number("1")).into(), }, }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Function(Function { name: ObjectName(vec![Ident::with_span( Span::new(Location::of(1, 11), Location::of(1, 22)), @@ -11153,7 +11153,7 @@ fn test_map_syntax() { }, ], })), - chain: vec![AccessField::Subscript(Subscript::Index { + chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(Value::SingleQuotedString("a".to_owned())), })], }, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 825af376b..000dae0dd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -656,7 +656,7 @@ fn test_array_index() { ], named: false })), - chain: vec![AccessField::Subscript(Subscript::Index { + chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(number("3")) })] }, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index b3d5ea3be..584d2850d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2097,7 +2097,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("foo"))), - chain: vec![AccessField::Subscript(Subscript::Index { + chain: vec![AccessExpr::Subscript(Subscript::Index { index: num[0].clone() })], }, @@ -2110,10 +2110,10 @@ fn parse_array_index_expr() { &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("foo"))), chain: vec![ - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }) ], @@ -2127,17 +2127,17 @@ fn parse_array_index_expr() { &Expr::CompoundExpr { root: Box::new(Expr::Identifier(Ident::new("bar"))), chain: vec![ - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "baz".to_string(), quote_style: Some('"'), span: Span::empty(), }) }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"'), @@ -2172,10 +2172,10 @@ fn parse_array_index_expr() { format: None, }))), chain: vec![ - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: num[1].clone() }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: num[2].clone() }), ], @@ -2270,7 +2270,7 @@ fn parse_array_subscript() { let Expr::CompoundExpr { chain, .. } = pg_and_generic().verified_expr(sql) else { panic!("expected subscript expr"); }; - let Some(AccessField::Subscript(subscript)) = chain.last() else { + let Some(AccessExpr::Subscript(subscript)) = chain.last() else { panic!("expected subscript"); }; assert_eq!(expect, *subscript); @@ -2293,12 +2293,12 @@ fn parse_array_multi_subscript() { ] )), chain: vec![ - AccessField::Subscript(Subscript::Slice { + AccessExpr::Subscript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), stride: None, }), - AccessField::Subscript(Subscript::Index { + AccessExpr::Subscript(Subscript::Index { index: Expr::Value(number("2")), }), ], From 7cb2e003bb85ab2d3202d24eb840456044baea2d Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 9 Dec 2024 23:09:17 +0800 Subject: [PATCH 16/30] remove unused crate --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 6d17bbd3e..5d72f9f0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -138,7 +138,6 @@ extern crate alloc; #[macro_use] #[cfg(test)] extern crate pretty_assertions; -extern crate core; pub mod ast; #[macro_use] From 397335acf5ba5a384a87de5cbe5e302df93bbb25 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 9 Dec 2024 23:36:58 +0800 Subject: [PATCH 17/30] update the out date doc --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1bb85e176..3b15b5ebd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1147,7 +1147,7 @@ impl<'a> Parser<'a> { Token::Period => self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span)), vec![]), Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; - // parse_comma_outer_join is used to parse the following pattern: + // parse `(+)` outer join syntax if dialect_of!(self is SnowflakeDialect | MsSqlDialect) && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) { From ac25e5d2f1c47279757841036b28ee5087878e15 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 9 Dec 2024 23:42:35 +0800 Subject: [PATCH 18/30] remove unused parsing --- src/parser/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3b15b5ebd..093f227c0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1163,11 +1163,7 @@ impl<'a> Parser<'a> { if self.dialect.supports_methods() { expr = self.try_parse_method(expr)? } - let mut fields = vec![]; - // if the function returns an array, it can be subscripted - if self.consume_token(&Token::LBracket) { - self.parse_multi_dim_subscript(&mut fields)?; - } + let fields = vec![]; self.parse_compound_expr(expr, fields) } } From a08e5c2c0101b2d30e436fe070f60503bd19bfc6 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 10 Dec 2024 00:02:13 +0800 Subject: [PATCH 19/30] rename to `CompoundFieldAccess` --- src/ast/mod.rs | 25 +++++++++++++------------ src/ast/spans.rs | 2 +- src/parser/mod.rs | 10 +++++----- tests/sqlparser_bigquery.rs | 2 +- tests/sqlparser_clickhouse.rs | 4 ++-- tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_duckdb.rs | 2 +- tests/sqlparser_postgres.rs | 12 ++++++------ 8 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 829324e87..f92b63c7b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -592,26 +592,27 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), - /// Multi-part Expression accessing. It's used to represent an access chain from a root expression. + /// Multi-part expression access. /// - /// For example: + /// This structure represents an access chain in structured / nested types + /// such as maps, arrays, and lists: /// - Array /// - A 1-dim array a[1] will be represented like: - /// `CompoundExpr(Ident('a'), vec![Subscript(1)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` /// - A 2-dim array a[1][2] will be represented like: - /// `CompoundExpr(Ident('a'), vec![Subscript(1), Subscript(2)]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` /// - Map or Struct (Bracket-style) /// - A map a['field1'] will be represented like: - /// `CompoundExpr(Ident('a'), vec![Subscript('field')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` /// - A 2-dim map a['field1']['field2'] will be represented like: - /// `CompoundExpr(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) /// - A struct access a[field1].field2 will be represented like: - /// `CompoundExpr(Ident('a'), vec![Subscript('field1'), Ident('field2')]` - /// - If a struct access likes a.field1.field2, it will be represented by CompoundIdentifer([a, field1, field2]) - CompoundExpr { + /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` + /// - If a struct access likes a.field1.field2, it will be represented by CompoundIdentifier([a, field1, field2]) + CompoundFieldAccess { root: Box, - chain: Vec, + access_chain: Vec, }, /// Access data nested in a value containing semi-structured data, such as /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. @@ -1070,7 +1071,7 @@ impl fmt::Display for Subscript { } } -/// An element of a [`Expr::CompoundExpr`]. +/// An element of a [`Expr::CompoundFieldAccess`]. /// It can be an expression or a subscript. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1289,7 +1290,7 @@ impl fmt::Display for Expr { Expr::Wildcard(_) => f.write_str("*"), Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), - Expr::CompoundExpr { root, chain } => { + Expr::CompoundFieldAccess { root, chain } => { write!(f, "{}", root)?; for field in chain { match field { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 57b83a794..83ce2a267 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1244,7 +1244,7 @@ impl Spanned for Expr { Expr::Identifier(ident) => ident.span, Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i| i.span)), Expr::CompositeAccess { expr, key } => expr.span().union(&key.span), - Expr::CompoundExpr { root, chain } => { + Expr::CompoundFieldAccess { root, chain } => { union_spans(iter::once(root.span()).chain(chain.iter().map(|i| i.span()))) } Expr::IsFalse(expr) => expr.span(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 093f227c0..2173e16ce 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1422,7 +1422,7 @@ impl<'a> Parser<'a> { } } - /// Try to parse an [Expr::CompoundExpr] like `a.b.c` or `a.b[1].c`. + /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`. /// If all the fields are `Expr::Identifier`s, return an [Expr::CompoundIdentifier] instead. /// If only the root exists, return the root. /// If self supports [Dialect::supports_partiql], it will fall back when occurs [Token::LBracket] for JsonAccess parsing. @@ -1505,7 +1505,7 @@ impl<'a> Parser<'a> { if chain.is_empty() { return Ok(root); } - Ok(Expr::CompoundExpr { + Ok(Expr::CompoundFieldAccess { root: Box::new(root), chain: chain.clone(), }) @@ -3286,15 +3286,15 @@ impl<'a> Parser<'a> { pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_expr()?; let result = match key { - Expr::Identifier(_) => Ok(Expr::CompoundExpr { + Expr::Identifier(_) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), chain: vec![AccessExpr::Dot(key)], }), - Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundExpr { + Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), chain: vec![AccessExpr::Dot(key)], }), - Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundExpr { + Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new(s)))], }), diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 8ece6c182..84001c74b 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1969,7 +1969,7 @@ fn parse_map_access_expr() { let sql = "users[-1][safe_offset(2)].a.b"; let expr = bigquery().verified_expr(sql); - let expected = Expr::CompoundExpr { + let expected = Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::with_span( Span::new(Location::of(1, 1), Location::of(1, 6)), "users", diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6d30ea1e5..94ec59fa4 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -44,7 +44,7 @@ fn parse_map_access_expr() { select_token: AttachedToken::empty(), top: None, top_before_distinct: false, - projection: vec![UnnamedExpr(Expr::CompoundExpr { + projection: vec![UnnamedExpr(Expr::CompoundFieldAccess { root: Box::new(Identifier(Ident { value: "string_values".to_string(), quote_style: None, @@ -84,7 +84,7 @@ fn parse_map_access_expr() { }), op: BinaryOperator::And, right: Box::new(BinaryOp { - left: Box::new(Expr::CompoundExpr { + left: Box::new(Expr::CompoundFieldAccess { root: Box::new(Identifier(Ident::new("string_value"))), chain: vec![AccessExpr::Subscript(Subscript::Index { index: call( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e3da7b70f..e55892338 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10325,7 +10325,7 @@ fn parse_map_access_expr() { Box::new(ClickHouseDialect {}), ]); let expr = dialects.verified_expr(sql); - let expected = Expr::CompoundExpr { + let expected = Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::with_span( Span::new(Location::of(1, 1), Location::of(1, 6)), "users", @@ -11140,7 +11140,7 @@ fn test_map_syntax() { check( "MAP {'a': 10, 'b': 20}['a']", - Expr::CompoundExpr { + Expr::CompoundFieldAccess { root: Box::new(Expr::Map(Map { entries: vec![ MapEntry { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 000dae0dd..ff0dd88a9 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -647,7 +647,7 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::CompoundExpr { + &Expr::CompoundFieldAccess { root: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 584d2850d..fd353d2f7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2095,7 +2095,7 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::CompoundExpr { + &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("foo"))), chain: vec![AccessExpr::Subscript(Subscript::Index { index: num[0].clone() @@ -2107,7 +2107,7 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::CompoundExpr { + &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("foo"))), chain: vec![ AccessExpr::Subscript(Subscript::Index { @@ -2124,7 +2124,7 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::CompoundExpr { + &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("bar"))), chain: vec![ AccessExpr::Subscript(Subscript::Index { @@ -2152,7 +2152,7 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::CompoundExpr { + &Expr::CompoundFieldAccess { root: Box::new(Expr::Nested(Box::new(Expr::Cast { kind: CastKind::Cast, expr: Box::new(Expr::Array(Array { @@ -2267,7 +2267,7 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::CompoundExpr { chain, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundFieldAccess { chain, .. } = pg_and_generic().verified_expr(sql) else { panic!("expected subscript expr"); }; let Some(AccessExpr::Subscript(subscript)) = chain.last() else { @@ -2283,7 +2283,7 @@ fn parse_array_subscript() { fn parse_array_multi_subscript() { let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); assert_eq!( - Expr::CompoundExpr { + Expr::CompoundFieldAccess { root: Box::new(call( "make_array", vec![ From 09b39eb74fc95f4005b5f51ec8af0f7a4604fcde Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 10 Dec 2024 00:12:18 +0800 Subject: [PATCH 20/30] rename chain and display AccessExpr by itself --- src/ast/mod.rs | 13 +++++-------- src/ast/spans.rs | 4 ++-- src/parser/mod.rs | 8 ++++---- tests/sqlparser_bigquery.rs | 2 +- tests/sqlparser_clickhouse.rs | 4 ++-- tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_duckdb.rs | 2 +- tests/sqlparser_postgres.rs | 14 +++++++------- 8 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f92b63c7b..6586005f8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1086,8 +1086,8 @@ pub enum AccessExpr { impl fmt::Display for AccessExpr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - AccessExpr::Dot(expr) => write!(f, "{}", expr), - AccessExpr::Subscript(subscript) => write!(f, "{}", subscript), + AccessExpr::Dot(expr) => write!(f, ".{}", expr), + AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript), } } } @@ -1290,13 +1290,10 @@ impl fmt::Display for Expr { Expr::Wildcard(_) => f.write_str("*"), Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), - Expr::CompoundFieldAccess { root, chain } => { + Expr::CompoundFieldAccess { root, access_chain } => { write!(f, "{}", root)?; - for field in chain { - match field { - AccessExpr::Dot(expr) => write!(f, ".{}", expr)?, - AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript)?, - } + for field in access_chain { + write!(f, "{}", field)?; } Ok(()) } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 83ce2a267..184628ccd 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1244,8 +1244,8 @@ impl Spanned for Expr { Expr::Identifier(ident) => ident.span, Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i| i.span)), Expr::CompositeAccess { expr, key } => expr.span().union(&key.span), - Expr::CompoundFieldAccess { root, chain } => { - union_spans(iter::once(root.span()).chain(chain.iter().map(|i| i.span()))) + Expr::CompoundFieldAccess { root, access_chain } => { + union_spans(iter::once(root.span()).chain(access_chain.iter().map(|i| i.span()))) } Expr::IsFalse(expr) => expr.span(), Expr::IsNotFalse(expr) => expr.span(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2173e16ce..963bfb10a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1507,7 +1507,7 @@ impl<'a> Parser<'a> { } Ok(Expr::CompoundFieldAccess { root: Box::new(root), - chain: chain.clone(), + access_chain: chain.clone(), }) } } @@ -3288,15 +3288,15 @@ impl<'a> Parser<'a> { let result = match key { Expr::Identifier(_) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), - chain: vec![AccessExpr::Dot(key)], + access_chain: vec![AccessExpr::Dot(key)], }), Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), - chain: vec![AccessExpr::Dot(key)], + access_chain: vec![AccessExpr::Dot(key)], }), Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundFieldAccess { root: Box::new(expr), - chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new(s)))], + access_chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new(s)))], }), _ => parser_err!("Expected identifier or string literal", self.peek_token()), }; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 84001c74b..c59e68a18 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1974,7 +1974,7 @@ fn parse_map_access_expr() { Span::new(Location::of(1, 1), Location::of(1, 6)), "users", ))), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 94ec59fa4..6ae9c896b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -50,7 +50,7 @@ fn parse_map_access_expr() { quote_style: None, span: Span::empty(), })), - chain: vec![AccessExpr::Subscript(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: call( "indexOf", [ @@ -86,7 +86,7 @@ fn parse_map_access_expr() { right: Box::new(BinaryOp { left: Box::new(Expr::CompoundFieldAccess { root: Box::new(Identifier(Ident::new("string_value"))), - chain: vec![AccessExpr::Subscript(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: call( "indexOf", [ diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e55892338..c8428494a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10330,7 +10330,7 @@ fn parse_map_access_expr() { Span::new(Location::of(1, 1), Location::of(1, 6)), "users", ))), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Index { index: Expr::UnaryOp { op: UnaryOperator::Minus, @@ -11153,7 +11153,7 @@ fn test_map_syntax() { }, ], })), - chain: vec![AccessExpr::Subscript(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(Value::SingleQuotedString("a".to_owned())), })], }, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index ff0dd88a9..38ce0b434 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -656,7 +656,7 @@ fn test_array_index() { ], named: false })), - chain: vec![AccessExpr::Subscript(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: Expr::Value(number("3")) })] }, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fd353d2f7..9a455684a 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2097,7 +2097,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("foo"))), - chain: vec![AccessExpr::Subscript(Subscript::Index { + access_chain: vec![AccessExpr::Subscript(Subscript::Index { index: num[0].clone() })], }, @@ -2109,7 +2109,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("foo"))), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), @@ -2126,7 +2126,7 @@ fn parse_array_index_expr() { assert_eq!( &Expr::CompoundFieldAccess { root: Box::new(Expr::Identifier(Ident::new("bar"))), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Index { index: num[0].clone() }), @@ -2171,7 +2171,7 @@ fn parse_array_index_expr() { )), format: None, }))), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Index { index: num[1].clone() }), @@ -2267,10 +2267,10 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::CompoundFieldAccess { chain, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundFieldAccess { access_chain, .. } = pg_and_generic().verified_expr(sql) else { panic!("expected subscript expr"); }; - let Some(AccessExpr::Subscript(subscript)) = chain.last() else { + let Some(AccessExpr::Subscript(subscript)) = access_chain.last() else { panic!("expected subscript"); }; assert_eq!(expect, *subscript); @@ -2292,7 +2292,7 @@ fn parse_array_multi_subscript() { Expr::Value(number("3")) ] )), - chain: vec![ + access_chain: vec![ AccessExpr::Subscript(Subscript::Slice { lower_bound: Some(Expr::Value(number("1"))), upper_bound: Some(Expr::Value(number("2"))), From 8968fcc29da76f16f52856b64b61a7ccdc1ceadc Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 10 Dec 2024 00:16:18 +0800 Subject: [PATCH 21/30] rename `parse_compound_expr` --- src/parser/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 963bfb10a..c1c6d84bc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1144,7 +1144,7 @@ impl<'a> Parser<'a> { w_span: Span, ) -> Result { match self.peek_token().token { - Token::Period => self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span)), vec![]), + Token::Period => self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![]), Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; // parse `(+)` outer join syntax @@ -1164,7 +1164,7 @@ impl<'a> Parser<'a> { expr = self.try_parse_method(expr)? } let fields = vec![]; - self.parse_compound_expr(expr, fields) + self.parse_compound_field_access(expr, fields) } } Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => @@ -1173,7 +1173,7 @@ impl<'a> Parser<'a> { let ident = Expr::Identifier(w.to_ident(w_span)); let mut fields = vec![]; self.parse_multi_dim_subscript(&mut fields)?; - self.parse_compound_expr(ident, fields) + self.parse_compound_field_access(ident, fields) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html Token::SingleQuotedString(_) @@ -1426,7 +1426,7 @@ impl<'a> Parser<'a> { /// If all the fields are `Expr::Identifier`s, return an [Expr::CompoundIdentifier] instead. /// If only the root exists, return the root. /// If self supports [Dialect::supports_partiql], it will fall back when occurs [Token::LBracket] for JsonAccess parsing. - pub fn parse_compound_expr( + pub fn parse_compound_field_access( &mut self, root: Expr, mut chain: Vec, @@ -3098,7 +3098,7 @@ impl<'a> Parser<'a> { { let mut chain = vec![]; self.parse_multi_dim_subscript(&mut chain)?; - self.parse_compound_expr(expr, chain) + self.parse_compound_field_access(expr, chain) } else if self.dialect.supports_partiql() { self.prev_token(); self.parse_json_access(expr) From 1328274af69efff91b6ac5fa839635f8748b16d5 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 10 Dec 2024 00:55:13 +0800 Subject: [PATCH 22/30] fmt and clippy --- src/parser/mod.rs | 4 +++- tests/sqlparser_postgres.rs | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c1c6d84bc..b8f99a81e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1144,7 +1144,9 @@ impl<'a> Parser<'a> { w_span: Span, ) -> Result { match self.peek_token().token { - Token::Period => self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![]), + Token::Period => { + self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![]) + } Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; // parse `(+)` outer join syntax diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9a455684a..d16bbee62 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2267,7 +2267,8 @@ fn parse_array_subscript() { ), ]; for (sql, expect) in tests { - let Expr::CompoundFieldAccess { access_chain, .. } = pg_and_generic().verified_expr(sql) else { + let Expr::CompoundFieldAccess { access_chain, .. } = pg_and_generic().verified_expr(sql) + else { panic!("expected subscript expr"); }; let Some(AccessExpr::Subscript(subscript)) = access_chain.last() else { From d6743e924c99cebaa1f065eb721ae085f9f0116f Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 10 Dec 2024 00:57:04 +0800 Subject: [PATCH 23/30] fix doc --- src/ast/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6586005f8..b55cd1820 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -597,19 +597,19 @@ pub enum Expr { /// This structure represents an access chain in structured / nested types /// such as maps, arrays, and lists: /// - Array - /// - A 1-dim array a[1] will be represented like: + /// - A 1-dim array `a[1]` will be represented like: /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]` - /// - A 2-dim array a[1][2] will be represented like: + /// - A 2-dim array `a[1][2]` will be represented like: /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1), Subscript(2)]` /// - Map or Struct (Bracket-style) - /// - A map a['field1'] will be represented like: + /// - A map `a['field1']` will be represented like: /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]` - /// - A 2-dim map a['field1']['field2'] will be represented like: + /// - A 2-dim map `a['field1']['field2']` will be represented like: /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Subscript('field2')]` /// - Struct (Dot-style) (only effect when the chain contains both subscript and expr) - /// - A struct access a[field1].field2 will be represented like: + /// - A struct access `a[field1].field2` will be represented like: /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` - /// - If a struct access likes a.field1.field2, it will be represented by CompoundIdentifier([a, field1, field2]) + /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) CompoundFieldAccess { root: Box, access_chain: Vec, From 7d030c1ea79d675ed3add2f7a5e8d1eec4719b96 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 16:09:18 +0800 Subject: [PATCH 24/30] remove unnecessary check --- src/parser/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b8f99a81e..f560b689a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1162,9 +1162,7 @@ impl<'a> Parser<'a> { } else { let mut expr = self.parse_function(ObjectName(id_parts))?; // consume all period if it's a method chain - if self.dialect.supports_methods() { - expr = self.try_parse_method(expr)? - } + expr = self.try_parse_method(expr)?; let fields = vec![]; self.parse_compound_field_access(expr, fields) } From 90e03ebcf06a19e303f7d85d058a900342b0220b Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 16:21:07 +0800 Subject: [PATCH 25/30] improve the doc --- src/parser/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f560b689a..8ae29dbc4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1512,6 +1512,8 @@ impl<'a> Parser<'a> { } } + /// Try to transform the root and fields into a list of [Ident]s. + /// If any element (root and fields) is not an [Expr::Identifier], return None. fn exprs_to_idents(root: &Expr, fields: &[AccessExpr]) -> Option> { let mut idents = vec![]; let Expr::Identifier(root) = root else { @@ -3199,7 +3201,7 @@ impl<'a> Parser<'a> { }) } - /// Parse an multi-dimension array accessing like `[1:3][1][1]` + /// Parse a multi-dimension array accessing like `[1:3][1][1]` /// /// Parser is right after the first `[` pub fn parse_multi_dim_subscript( From 5c54d1b26cee61683ab4d382090c80c4ac84a6b3 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 16:28:29 +0800 Subject: [PATCH 26/30] remove the unused method `parse_map_access` --- src/parser/mod.rs | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8ae29dbc4..46e64646f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3105,7 +3105,7 @@ impl<'a> Parser<'a> { self.prev_token(); self.parse_json_access(expr) } else { - self.parse_map_access(expr) + parser_err!("Array subscripting is not supported", tok.span.start) } } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { self.prev_token(); @@ -3285,27 +3285,6 @@ impl<'a> Parser<'a> { Ok(JsonPath { path }) } - pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_expr()?; - let result = match key { - Expr::Identifier(_) => Ok(Expr::CompoundFieldAccess { - root: Box::new(expr), - access_chain: vec![AccessExpr::Dot(key)], - }), - Expr::Value(Value::SingleQuotedString(_)) => Ok(Expr::CompoundFieldAccess { - root: Box::new(expr), - access_chain: vec![AccessExpr::Dot(key)], - }), - Expr::Value(Value::DoubleQuotedString(s)) => Ok(Expr::CompoundFieldAccess { - root: Box::new(expr), - access_chain: vec![AccessExpr::Dot(Expr::Identifier(Ident::new(s)))], - }), - _ => parser_err!("Expected identifier or string literal", self.peek_token()), - }; - self.expect_token(&Token::RBracket)?; - result - } - /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` From 57830e2604a4db342717d07bca12edebf8eb67d3 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 16:50:30 +0800 Subject: [PATCH 27/30] avoid the unnecessary cloning --- src/parser/mod.rs | 49 ++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 46e64646f..7d59e5d38 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1474,17 +1474,18 @@ impl<'a> Parser<'a> { } if let Some(wildcard_token) = ending_wildcard { - let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { + if !Self::is_all_ident(&root, &chain) { return self.expected("an identifier or a '*' after '.'", self.peek_token()); }; Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), + ObjectName(Self::exprs_to_idents(root, chain)?), AttachedToken(wildcard_token), )) } else if self.consume_token(&Token::LParen) { - let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else { + if !Self::is_all_ident(&root, &chain) { return self.expected("an identifier or a '*' after '.'", self.peek_token()); }; + let id_parts = Self::exprs_to_idents(root, chain)?; if dialect_of!(self is SnowflakeDialect | MsSqlDialect) && self.consume_tokens(&[Token::Plus, Token::RParen]) { @@ -1499,8 +1500,10 @@ impl<'a> Parser<'a> { self.parse_function(ObjectName(id_parts)) } } else { - if let Some(id_parts) = Self::exprs_to_idents(&root, &chain) { - return Ok(Expr::CompoundIdentifier(id_parts)); + if Self::is_all_ident(&root, &chain) { + return Ok(Expr::CompoundIdentifier(Self::exprs_to_idents( + root, chain, + )?)); } if chain.is_empty() { return Ok(root); @@ -1512,22 +1515,32 @@ impl<'a> Parser<'a> { } } - /// Try to transform the root and fields into a list of [Ident]s. - /// If any element (root and fields) is not an [Expr::Identifier], return None. - fn exprs_to_idents(root: &Expr, fields: &[AccessExpr]) -> Option> { + /// Check if the root is an identifier and all fields are identifiers. + fn is_all_ident(root: &Expr, fields: &[AccessExpr]) -> bool { + if !matches!(root, Expr::Identifier(_)) { + return false; + } + fields + .iter() + .all(|x| matches!(x, AccessExpr::Dot(Expr::Identifier(_)))) + } + + /// Convert a root and a list of fields to a list of identifiers. + fn exprs_to_idents(root: Expr, fields: Vec) -> Result, ParserError> { let mut idents = vec![]; - let Expr::Identifier(root) = root else { - return None; - }; - idents.push(root.clone()); - for x in fields { - if let AccessExpr::Dot(Expr::Identifier(ident)) = x { - idents.push(ident.clone()) - } else { - return None; + if let Expr::Identifier(root) = root { + idents.push(root); + for x in fields { + if let AccessExpr::Dot(Expr::Identifier(ident)) = x { + idents.push(ident); + } else { + return parser_err!(format!("Expected identifier, found: {}", x), x.span().start); + } } + Ok(idents) + } else { + parser_err!(format!("Expected identifier, found: {}", root), root.span().start) } - Some(idents) } pub fn parse_utility_options(&mut self) -> Result, ParserError> { From 4b3818cfc10cc2a15b985e50a0579b932a9220f8 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 17:21:34 +0800 Subject: [PATCH 28/30] extract parse outer_join_expr --- src/parser/mod.rs | 54 +++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7d59e5d38..385be67ed 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1149,16 +1149,8 @@ impl<'a> Parser<'a> { } Token::LParen => { let id_parts = vec![w.to_ident(w_span)]; - // parse `(+)` outer join syntax - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) } else { let mut expr = self.parse_function(ObjectName(id_parts))?; // consume all period if it's a method chain @@ -1481,22 +1473,16 @@ impl<'a> Parser<'a> { ObjectName(Self::exprs_to_idents(root, chain)?), AttachedToken(wildcard_token), )) - } else if self.consume_token(&Token::LParen) { + } else if matches!(self.peek_token().token, Token::LParen) { if !Self::is_all_ident(&root, &chain) { + // consume LParen + self.next_token(); return self.expected("an identifier or a '*' after '.'", self.peek_token()); }; let id_parts = Self::exprs_to_idents(root, chain)?; - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) + if let Some(expr) = self.parse_outer_join_expr(&id_parts) { + Ok(expr) } else { - self.prev_token(); self.parse_function(ObjectName(id_parts)) } } else { @@ -1534,12 +1520,34 @@ impl<'a> Parser<'a> { if let AccessExpr::Dot(Expr::Identifier(ident)) = x { idents.push(ident); } else { - return parser_err!(format!("Expected identifier, found: {}", x), x.span().start); + return parser_err!( + format!("Expected identifier, found: {}", x), + x.span().start + ); } } Ok(idents) } else { - parser_err!(format!("Expected identifier, found: {}", root), root.span().start) + parser_err!( + format!("Expected identifier, found: {}", root), + root.span().start + ) + } + } + + /// Try to parse OuterJoin expression `(+)` + fn parse_outer_join_expr(&mut self, id_parts: &[Ident]) -> Option { + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) + { + Some(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts.to_vec()) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + None } } From 67cd8778224439aa2f1ddd6605fe15ed0cf12279 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 17:40:32 +0800 Subject: [PATCH 29/30] consume LBarcket by `parse_multi_dim_subscript` --- src/parser/mod.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 385be67ed..6f31b07a5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1161,7 +1161,6 @@ impl<'a> Parser<'a> { } Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) => { - let _ = self.consume_token(&Token::LBracket); let ident = Expr::Identifier(w.to_ident(w_span)); let mut fields = vec![]; self.parse_multi_dim_subscript(&mut fields)?; @@ -1431,8 +1430,9 @@ impl<'a> Parser<'a> { Token::Word(w) => { let expr = Expr::Identifier(w.to_ident(next_token.span)); chain.push(AccessExpr::Dot(expr)); - if self.consume_token(&Token::LBracket) { + if self.peek_token().token == Token::LBracket { if self.dialect.supports_partiql() { + self.next_token(); ending_lbracket = true; break; } else { @@ -1473,7 +1473,7 @@ impl<'a> Parser<'a> { ObjectName(Self::exprs_to_idents(root, chain)?), AttachedToken(wildcard_token), )) - } else if matches!(self.peek_token().token, Token::LParen) { + } else if self.peek_token().token == Token::LParen { if !Self::is_all_ident(&root, &chain) { // consume LParen self.next_token(); @@ -3120,6 +3120,8 @@ impl<'a> Parser<'a> { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) { let mut chain = vec![]; + // back to LBracket + self.prev_token(); self.parse_multi_dim_subscript(&mut chain)?; self.parse_compound_field_access(expr, chain) } else if self.dialect.supports_partiql() { @@ -3223,17 +3225,12 @@ impl<'a> Parser<'a> { } /// Parse a multi-dimension array accessing like `[1:3][1][1]` - /// - /// Parser is right after the first `[` pub fn parse_multi_dim_subscript( &mut self, chain: &mut Vec, ) -> Result<(), ParserError> { - loop { + while self.consume_token(&Token::LBracket) { self.parse_subscript(chain)?; - if !self.consume_token(&Token::LBracket) { - break; - } } Ok(()) } @@ -3241,7 +3238,7 @@ impl<'a> Parser<'a> { /// Parses an array subscript like `[1:3]` /// /// Parser is right after `[` - pub fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { + fn parse_subscript(&mut self, chain: &mut Vec) -> Result<(), ParserError> { let subscript = self.parse_subscript_inner()?; chain.push(AccessExpr::Subscript(subscript)); Ok(()) From 94847d70a890b68a27fdc33aab83a68d0ac00c6e Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Mon, 16 Dec 2024 17:47:25 +0800 Subject: [PATCH 30/30] fix compile --- tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_common.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7aff10520..94bbdbde8 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1984,6 +1984,7 @@ fn parse_map_access_expr() { null_treatment: None, over: None, within_group: vec![], + uses_odbc_syntax: false, }), }), AccessExpr::Dot(Expr::Identifier(Ident::with_span( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2adcaacad..008640e90 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10192,6 +10192,7 @@ fn parse_map_access_expr() { null_treatment: None, over: None, within_group: vec![], + uses_odbc_syntax: false, }), }), ],