diff --git a/src/ast/query.rs b/src/ast/query.rs index 781157069..2ef456b1f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1410,6 +1410,31 @@ pub enum TableFactor { /// The alias for the table. alias: Option, }, + /// Snowflake's SEMANTIC_VIEW function for semantic models. + /// + /// + /// + /// ```sql + /// SELECT * FROM SEMANTIC_VIEW( + /// tpch_analysis + /// DIMENSIONS customer.customer_market_segment + /// METRICS orders.order_average_value + /// ); + /// ``` + SemanticView { + /// The name of the semantic model + name: ObjectName, + /// List of dimensions or expression referring to dimensions (e.g. DATE_PART('year', col)) + dimensions: Vec, + /// List of metrics (references to objects like orders.value, value, orders.*) + metrics: Vec, + /// List of facts or expressions referring to facts or dimensions. + facts: Vec, + /// WHERE clause for filtering + where_clause: Option, + /// The alias for the table + alias: Option, + }, } /// The table sample modifier options @@ -2112,6 +2137,40 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::SemanticView { + name, + dimensions, + metrics, + facts, + where_clause, + alias, + } => { + write!(f, "SEMANTIC_VIEW({name}")?; + + if !dimensions.is_empty() { + write!(f, " DIMENSIONS {}", display_comma_separated(dimensions))?; + } + + if !metrics.is_empty() { + write!(f, " METRICS {}", display_comma_separated(metrics))?; + } + + if !facts.is_empty() { + write!(f, " FACTS {}", display_comma_separated(facts))?; + } + + if let Some(where_clause) = where_clause { + write!(f, " WHERE {where_clause}")?; + } + + write!(f, ")")?; + + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + + Ok(()) + } } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 7f0175828..add6c3904 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2044,6 +2044,23 @@ impl Spanned for TableFactor { .chain(symbols.iter().map(|i| i.span())) .chain(alias.as_ref().map(|i| i.span())), ), + TableFactor::SemanticView { + name, + dimensions, + metrics, + facts, + where_clause, + alias, + } => union_spans( + name.0 + .iter() + .map(|i| i.span()) + .chain(dimensions.iter().map(|d| d.span())) + .chain(metrics.iter().map(|m| m.span())) + .chain(facts.iter().map(|f| f.span())) + .chain(where_clause.as_ref().map(|e| e.span())) + .chain(alias.as_ref().map(|a| a.span())), + ), TableFactor::OpenJsonTable { .. } => Span::empty(), } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 7cf9d4fd1..f91209722 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1182,6 +1182,20 @@ pub trait Dialect: Debug + Any { fn supports_create_table_like_parenthesized(&self) -> bool { false } + + /// Returns true if the dialect supports `SEMANTIC_VIEW()` table functions. + /// + /// ```sql + /// SELECT * FROM SEMANTIC_VIEW( + /// model_name + /// DIMENSIONS customer.name, customer.region + /// METRICS orders.revenue, orders.count + /// WHERE customer.active = true + /// ) + /// ``` + fn supports_semantic_view_table_factor(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 46c72a799..07ef8317a 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -566,6 +566,10 @@ impl Dialect for SnowflakeDialect { fn supports_select_wildcard_exclude(&self) -> bool { true } + + fn supports_semantic_view_table_factor(&self) -> bool { + true + } } // Peeks ahead to identify tokens that are expected after diff --git a/src/keywords.rs b/src/keywords.rs index 126871302..988f375c0 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -290,6 +290,7 @@ define_keywords!( DETACH, DETAIL, DETERMINISTIC, + DIMENSIONS, DIRECTORY, DISABLE, DISCARD, @@ -359,6 +360,7 @@ define_keywords!( EXTERNAL, EXTERNAL_VOLUME, EXTRACT, + FACTS, FAIL, FAILOVER, FALSE, @@ -566,6 +568,7 @@ define_keywords!( METADATA, METHOD, METRIC, + METRICS, MICROSECOND, MICROSECONDS, MILLENIUM, @@ -828,6 +831,7 @@ define_keywords!( SECURITY, SEED, SELECT, + SEMANTIC_VIEW, SEMI, SENSITIVE, SEPARATOR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 52db37b7d..6179b8345 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4245,6 +4245,18 @@ impl<'a> Parser<'a> { /// not be efficient as it does a loop on the tokens with `peek_nth_token` /// each time. pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { + self.keyword_with_tokens(expected, tokens, true) + } + + /// Peeks to see if the current token is the `expected` keyword followed by specified tokens + /// without consuming them. + /// + /// See [Self::parse_keyword_with_tokens] for details. + pub(crate) fn peek_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { + self.keyword_with_tokens(expected, tokens, false) + } + + fn keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token], consume: bool) -> bool { match &self.peek_token_ref().token { Token::Word(w) if expected == w.keyword => { for (idx, token) in tokens.iter().enumerate() { @@ -4252,10 +4264,13 @@ impl<'a> Parser<'a> { return false; } } - // consume all tokens - for _ in 0..(tokens.len() + 1) { - self.advance_token(); + + if consume { + for _ in 0..(tokens.len() + 1) { + self.advance_token(); + } } + true } _ => false, @@ -13397,6 +13412,7 @@ impl<'a> Parser<'a> { | TableFactor::Pivot { alias, .. } | TableFactor::Unpivot { alias, .. } | TableFactor::MatchRecognize { alias, .. } + | TableFactor::SemanticView { alias, .. } | TableFactor::NestedJoin { alias, .. } => { // but not `FROM (mytable AS alias1) AS alias2`. if let Some(inner_alias) = alias { @@ -13511,6 +13527,10 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) { self.prev_token(); self.parse_xml_table_factor() + } else if self.dialect.supports_semantic_view_table_factor() + && self.peek_keyword_with_tokens(Keyword::SEMANTIC_VIEW, &[Token::LParen]) + { + self.parse_semantic_view_table_factor() } else { let name = self.parse_object_name(true)?; @@ -13842,6 +13862,70 @@ impl<'a> Parser<'a> { Ok(XmlPassingClause { arguments }) } + /// Parse a [TableFactor::SemanticView] + fn parse_semantic_view_table_factor(&mut self) -> Result { + self.expect_keyword(Keyword::SEMANTIC_VIEW)?; + self.expect_token(&Token::LParen)?; + + let name = self.parse_object_name(true)?; + + // Parse DIMENSIONS, METRICS, FACTS and WHERE clauses in flexible order + let mut dimensions = Vec::new(); + let mut metrics = Vec::new(); + let mut facts = Vec::new(); + let mut where_clause = None; + + while self.peek_token().token != Token::RParen { + if self.parse_keyword(Keyword::DIMENSIONS) { + if !dimensions.is_empty() { + return Err(ParserError::ParserError( + "DIMENSIONS clause can only be specified once".to_string(), + )); + } + dimensions = self.parse_comma_separated(Parser::parse_expr)?; + } else if self.parse_keyword(Keyword::METRICS) { + if !metrics.is_empty() { + return Err(ParserError::ParserError( + "METRICS clause can only be specified once".to_string(), + )); + } + metrics = self.parse_comma_separated(|parser| parser.parse_object_name(true))?; + } else if self.parse_keyword(Keyword::FACTS) { + if !facts.is_empty() { + return Err(ParserError::ParserError( + "FACTS clause can only be specified once".to_string(), + )); + } + facts = self.parse_comma_separated(Parser::parse_expr)?; + } else if self.parse_keyword(Keyword::WHERE) { + if where_clause.is_some() { + return Err(ParserError::ParserError( + "WHERE clause can only be specified once".to_string(), + )); + } + where_clause = Some(self.parse_expr()?); + } else { + return parser_err!( + "Expected one of DIMENSIONS, METRICS, FACTS or WHERE", + self.peek_token().span.start + )?; + } + } + + self.expect_token(&Token::RParen)?; + + let alias = self.maybe_parse_table_alias()?; + + Ok(TableFactor::SemanticView { + name, + dimensions, + metrics, + facts, + where_clause, + alias, + }) + } + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ca2289616..f7a0b1d1f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16841,3 +16841,126 @@ fn parse_copy_options() { _ => unreachable!(), } } + +#[test] +fn test_parse_semantic_view_table_factor() { + let dialects = all_dialects_where(|d| d.supports_semantic_view_table_factor()); + + let valid_sqls = [ + ("SELECT * FROM SEMANTIC_VIEW(model)", None), + ( + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1, dim2)", + None, + ), + ("SELECT * FROM SEMANTIC_VIEW(a.b METRICS c.d, c.e)", None), + ( + "SELECT * FROM SEMANTIC_VIEW(model FACTS fact1, fact2)", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model FACTS DATE_PART('year', col))", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 METRICS met1)", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 WHERE x > 0)", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1) AS sv", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS DATE_PART('year', col))", + None, + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model METRICS orders.col, orders.col2)", + None, + ), + // We can parse in any order but will always produce a result in a fixed order. + ( + "SELECT * FROM SEMANTIC_VIEW(model WHERE x > 0 DIMENSIONS dim1)", + Some("SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 WHERE x > 0)"), + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model METRICS met1 DIMENSIONS dim1)", + Some("SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 METRICS met1)"), + ), + ( + "SELECT * FROM SEMANTIC_VIEW(model FACTS fact1 DIMENSIONS dim1)", + Some("SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 FACTS fact1)"), + ), + ]; + + for (input_sql, expected_sql) in valid_sqls { + if let Some(expected) = expected_sql { + // Test that non-canonical order gets normalized + let parsed = dialects.parse_sql_statements(input_sql).unwrap(); + let formatted = parsed[0].to_string(); + assert_eq!(formatted, expected); + } else { + dialects.verified_stmt(input_sql); + } + } + + let invalid_sqls = [ + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 INVALID inv1)", + "SELECT * FROM SEMANTIC_VIEW(model DIMENSIONS dim1 DIMENSIONS dim2)", + "SELECT * FROM SEMANTIC_VIEW(model METRICS SUM(met1.avg))", + ]; + + for sql in invalid_sqls { + let result = dialects.parse_sql_statements(sql); + assert!(result.is_err(), "Expected error for invalid SQL: {}", sql); + } + + let ast_sql = r#"SELECT * FROM SEMANTIC_VIEW( + my_model + DIMENSIONS DATE_PART('year', date_col), region_name + METRICS orders.revenue, orders.count + WHERE active = true + ) AS model_alias"#; + + let stmt = dialects.parse_sql_statements(ast_sql).unwrap(); + match &stmt[0] { + Statement::Query(q) => { + if let SetExpr::Select(select) = q.body.as_ref() { + if let Some(TableWithJoins { relation, .. }) = select.from.first() { + match relation { + TableFactor::SemanticView { + name, + dimensions, + metrics, + facts, + where_clause, + alias, + } => { + assert_eq!(name.to_string(), "my_model"); + assert_eq!(dimensions.len(), 2); + assert_eq!(dimensions[0].to_string(), "DATE_PART('year', date_col)"); + assert_eq!(dimensions[1].to_string(), "region_name"); + assert_eq!(metrics.len(), 2); + assert_eq!(metrics[0].to_string(), "orders.revenue"); + assert_eq!(metrics[1].to_string(), "orders.count"); + assert!(facts.is_empty()); + assert!(where_clause.is_some()); + assert_eq!(where_clause.as_ref().unwrap().to_string(), "active = true"); + assert!(alias.is_some()); + assert_eq!(alias.as_ref().unwrap().name.value, "model_alias"); + } + _ => panic!("Expected SemanticView table factor"), + } + } else { + panic!("Expected table in FROM clause"); + } + } else { + panic!("Expected SELECT statement"); + } + } + _ => panic!("Expected Query statement"), + } +}