Skip to content
7 changes: 4 additions & 3 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ pub use self::ddl::{
pub use self::dml::{Delete, Insert};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
AfterMatchSkip, ConnectBy, Cse, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause,
ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias,
IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint,
Expand All @@ -90,8 +90,9 @@ pub use self::query::{
TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod,
TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier,
TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind,
ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition,
XmlPassingArgument, XmlPassingClause, XmlTableColumn, XmlTableColumnOption,
ValueTableMode, Values, WildcardAdditionalOptions, With, WithExpression, WithFill,
XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn,
XmlTableColumnOption,
};

pub use self::trigger::{
Expand Down
68 changes: 66 additions & 2 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ pub struct With {
/// Token for the "WITH" keyword
pub with_token: AttachedToken,
pub recursive: bool,
pub cte_tables: Vec<Cte>,
pub cte_tables: Vec<WithExpression>,
}

impl fmt::Display for With {
Expand Down Expand Up @@ -641,7 +641,71 @@ impl fmt::Display for CteAsMaterialized {
}
}

/// A single CTE (used after `WITH`): `<alias> [(col1, col2, ...)] AS <materialized> ( <query> )`
/// `WITH` clause in `SELECT`.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum WithExpression {
/// Common table expression.
Cte(Cte),
/// Common scalar expression.
Cse(Cse),
}

impl WithExpression {
pub fn cte(&self) -> Option<&Cte> {
match self {
Self::Cte(cte) => Some(cte),
Self::Cse(_) => None,
}
}

pub fn cse(&self) -> Option<&Cse> {
match self {
Self::Cte(_) => None,
Self::Cse(cse) => Some(cse),
}
}
}

impl fmt::Display for WithExpression {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Cte(cte) => cte.fmt(f),
Self::Cse(cse) => cse.fmt(f),
}
}
}

/// A common scalar expression (CSE).
///
/// ```sql
/// [WITH] <expr> AS <ident> [,]
/// ```
///
/// See https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Cse {
pub expr: Expr,
pub ident: Ident,
}

impl fmt::Display for Cse {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.expr.fmt(f)?;
f.write_str(" AS ")?;
self.ident.fmt(f)?;
Ok(())
}
}
/// A common table expression (CTE).
///
/// ```sql
/// [WITH] <alias> [(col1, col2, ...)] AS <materialized> ( <query> ) [,]
/// ```
///
/// The names in the column list before `AS`, when specified, replace the names
/// of the columns returned by the query. The parser does not validate that the
/// number of columns in the query matches the number of columns in the query.
Expand Down
23 changes: 22 additions & 1 deletion src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,23 @@ impl Spanned for With {
}
}

impl Spanned for super::query::WithExpression {
fn span(&self) -> Span {
match self {
super::query::WithExpression::Cte(cte) => cte.span(),
super::query::WithExpression::Cse(cse) => cse.span(),
}
}
}

impl Spanned for super::query::Cse {
fn span(&self) -> Span {
let super::query::Cse { expr, ident } = self;

union_spans(core::iter::once(expr.span()).chain(core::iter::once(ident.span)))
}
}

impl Spanned for Cte {
fn span(&self) -> Span {
let Cte {
Expand Down Expand Up @@ -2560,7 +2577,11 @@ pub mod tests {

let query = test.0.parse_query().unwrap();
let cte_span = query.clone().with.unwrap().cte_tables[0].span();
let cte_query_span = query.clone().with.unwrap().cte_tables[0].query.span();
let cte_query_span = query.clone().with.unwrap().cte_tables[0]
.cte()
.unwrap()
.query
.span();
let body_span = query.body.span();

// the WITH keyboard is part of the query
Expand Down
5 changes: 5 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ impl Dialect for ClickHouseDialect {
true
}

/// See <https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions>.
fn supports_common_scalar_expressions(&self) -> bool {
true
}

/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/order-by>
fn supports_order_by_all(&self) -> bool {
true
Expand Down
14 changes: 14 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,20 @@ pub trait Dialect: Debug + Any {
false
}

/// Returns true if the dialect supports Common Scalar Expressions in `SELECT`.
///
/// For example:
/// ```sql
/// WITH
/// toDate('2000-01-01') AS start_date
/// SELECT * from tbl WHERE col1 > start_date;
/// ```
///
/// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)
fn supports_common_scalar_expressions(&self) -> bool {
false
}
Comment on lines +609 to +611
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also add this to the generic dialect? I could not grasp the idea of what expressions should be supported in the generic dialect and what should be not.

For example, supports_select_expr_star isn't listed as supported. Neither is supports_select_exclude.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general we try to add support for new syntax to the generic dialect as long as the syntax doesn't conflict with existing syntax support on the dialect

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, CSE exist only in ClickHouse. Does it make sense to include this to the generic dialect? If yes, then:

as long as the syntax doesn't conflict with existing syntax support on the dialect

It's difficult for me to answer to this question - I don't know whether there can be a conflict. But since it's only a ClickHouse feature - I'd not include this to the generic dialect. But I won't oppose either.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah if there's a conflict that would show up in the tests, so that it should be a matter of flagging it to true in the dialect and then changing the tests to use all_dialects_where(|d| di.supports_common_scalar_expressions()) to test instead of clickhouse()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pravic just double checking the status of this PR, if you would have time to address this comment?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@iffyio No, haven't touched this bit so far. I you can, I would appreciate changing this.


/// Return true if the dialect supports specifying multiple options
/// in a `CREATE TABLE` statement for the structure of the new table. For example:
/// `CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a`
Expand Down
25 changes: 23 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11801,7 +11801,7 @@ impl<'a> Parser<'a> {
Some(With {
with_token: with_token.clone().into(),
recursive: self.parse_keyword(Keyword::RECURSIVE),
cte_tables: self.parse_comma_separated(Parser::parse_cte)?,
cte_tables: self.parse_comma_separated(Parser::parse_with_expression)?,
})
} else {
None
Expand Down Expand Up @@ -12260,7 +12260,28 @@ impl<'a> Parser<'a> {
})
}

/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
/// Parse the expression in a `WITH` clause.
pub fn parse_with_expression(&mut self) -> Result<WithExpression, ParserError> {
Ok(if self.dialect.supports_common_scalar_expressions() {
if let Some(cse) = self.maybe_parse(|parser| parser.parse_cse())? {
WithExpression::Cse(cse)
} else {
WithExpression::Cte(self.parse_cte()?)
}
} else {
WithExpression::Cte(self.parse_cte()?)
})
}

/// Parse a [`Cse`] in a `WITH` clause.
pub fn parse_cse(&mut self) -> Result<Cse, ParserError> {
let expr = self.parse_expr()?;
self.expect_keyword_is(Keyword::AS)?;
let ident = self.parse_identifier()?;
Ok(Cse { expr, ident })
}

/// Parse a [`Cte`] in a `WITH` clause.
pub fn parse_cte(&mut self) -> Result<Cte, ParserError> {
let name = self.parse_identifier()?;

Expand Down
59 changes: 59 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1729,6 +1729,65 @@ fn test_parse_not_null_in_column_options() {
);
}

#[test]
fn parse_cse() {
clickhouse().verified_stmt("WITH x AS (SELECT 1) UPDATE t SET bar = (SELECT * FROM x)");

let with = concat!(
"WITH",
" toIntervalSecond(300) AS bucket_size,",
" toDateTime64(1735751460, 9) AS start_time,",
" toDateTime64(1735755060, 9) AS end_time ",
"SELECT",
" toStartOfInterval(EventTime, bucket_size) AS bucket,",
" count() AS count ",
"FROM logs",
);
clickhouse().verified_query(with);

let mixed = concat!(
"WITH",
" toDate(now()) AS today,",
" tbl (c) AS (SELECT toDate('2000-01-01')) ",
"SELECT",
" * ",
"FROM tbl ",
"WHERE c < today"
);
clickhouse().verified_query(mixed);

// valid
clickhouse()
.parse_sql_statements("WITH foo() AS bar SELECT 1")
.unwrap();

// ClickHouse allows these, but not sqlparser
clickhouse()
.parse_sql_statements("WITH foo, bar SELECT 1")
.expect_err("Expected: AS, found: ,");

clickhouse()
.parse_sql_statements("WITH foo(), bar SELECT 1")
.expect_err("Expected: identifier, found: )");
Comment on lines +1764 to +1771
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting: https://fiddle.clickhouse.com/1292850a-31e4-49c7-a309-79bc47a273a2

I am not sure whether it's a feature or a bug in the ClickHouse syntax parser. But since these examples don't make much sense, let's leave them as rejected.


// invalid
clickhouse()
.parse_sql_statements("WITH foo bar SELECT 1")
.expect_err("Expected: ");

clickhouse()
.parse_sql_statements("WITH foo() bar SELECT 1")
.expect_err("Expected: ");

clickhouse()
.parse_sql_statements("WITH foo() bar() SELECT 1")
.expect_err("Expected: ");

clickhouse()
.parse_sql_statements("WITH foo() AS bar() SELECT 1")
.expect_err("Expected: ");
}

fn clickhouse() -> TestedDialects {
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
}
Expand Down
13 changes: 9 additions & 4 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7537,7 +7537,7 @@ fn parse_ctes() {

fn assert_ctes_in_select(expected: &[&str], sel: &Query) {
for (i, exp) in expected.iter().enumerate() {
let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i];
let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i].cte().unwrap();
assert_eq!(*exp, query.to_string());
assert_eq!(
if i == 0 {
Expand Down Expand Up @@ -7580,7 +7580,10 @@ fn parse_ctes() {
// CTE in a CTE...
let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte");
let select = verified_query(sql);
assert_ctes_in_select(&cte_sqls, &only(&select.with.unwrap().cte_tables).query);
assert_ctes_in_select(
&cte_sqls,
&only(&select.with.unwrap().cte_tables).cte().unwrap().query,
);
}

#[test]
Expand All @@ -7598,6 +7601,8 @@ fn parse_cte_renamed_columns() {
.cte_tables
.first()
.unwrap()
.cte()
.unwrap()
.alias
.columns
);
Expand Down Expand Up @@ -7628,7 +7633,7 @@ fn parse_recursive_cte() {
materialized: None,
closing_paren_token: AttachedToken::empty(),
};
assert_eq!(with.cte_tables.first().unwrap(), &expected);
assert_eq!(with.cte_tables.first().unwrap().cte().unwrap(), &expected);
}

#[test]
Expand Down Expand Up @@ -17105,7 +17110,7 @@ fn test_parse_semantic_view_table_factor() {
}

let ast_sql = r#"SELECT * FROM SEMANTIC_VIEW(
my_model
my_model
DIMENSIONS DATE_PART('year', date_col), region_name
METRICS orders.revenue, orders.count
WHERE active = true
Expand Down