Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions datafusion-examples/examples/sql_dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::fmt::Display;

use datafusion::error::Result;
use datafusion::sql::{
parser::{CopyToSource, CopyToStatement, DFParser, Statement},
parser::{CopyToSource, CopyToStatement, DFParser, DFParserBuilder, Statement},
sqlparser::{keywords::Keyword, parser::ParserError, tokenizer::Token},
};

Expand All @@ -46,9 +46,9 @@ struct MyParser<'a> {
df_parser: DFParser<'a>,
}

impl MyParser<'_> {
fn new(sql: &str) -> Result<Self> {
let df_parser = DFParser::new(sql)?;
impl<'a> MyParser<'a> {
fn new(sql: &'a str) -> Result<Self> {
let df_parser = DFParserBuilder::new(sql).build()?;
Ok(Self { df_parser })
}

Expand Down
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ config_namespace! {
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
/// and recorded in the logical plan nodes.
pub collect_spans: bool, default = false

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50
}
}

Expand Down
18 changes: 15 additions & 3 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
use datafusion_physical_optimizer::optimizer::PhysicalOptimizer;
use datafusion_physical_optimizer::PhysicalOptimizerRule;
use datafusion_physical_plan::ExecutionPlan;
use datafusion_sql::parser::{DFParser, Statement};
use datafusion_sql::parser::{DFParserBuilder, Statement};
use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel};

use async_trait::async_trait;
Expand Down Expand Up @@ -483,12 +483,20 @@ impl SessionState {
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
)
})?;
let mut statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;

let recursion_limit = self.config.options().sql_parser.recursion_limit;

let mut statements = DFParserBuilder::new(sql)
.with_dialect(dialect.as_ref())
.with_recursion_limit(recursion_limit)
.parse_statements()?;

if statements.len() > 1 {
return not_impl_err!(
"The context currently only supports a single SQL statement"
);
}

let statement = statements.pop_front().ok_or_else(|| {
plan_datafusion_err!("No SQL statements were provided in the query string")
})?;
Expand Down Expand Up @@ -522,7 +530,11 @@ impl SessionState {
)
})?;

let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?;
let recursion_limit = self.config.options().sql_parser.recursion_limit;
let expr = DFParserBuilder::new(sql)
.with_dialect(dialect.as_ref())
.with_recursion_limit(recursion_limit)
.parse_expr()?;

Ok(expr)
}
Expand Down
113 changes: 84 additions & 29 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,33 +269,69 @@ pub struct DFParser<'a> {
pub parser: Parser<'a>,
}

impl<'a> DFParser<'a> {
/// Create a new parser for the specified tokens using the
/// Same as `sqlparser`
const DEFAULT_RECURSION_LIMIT: usize = 50;
const DEFAULT_DIALECT: GenericDialect = GenericDialect {};

pub struct DFParserBuilder<'a> {
pub sql: &'a str,
pub dialect: &'a dyn Dialect,
pub recursion_limit: usize,
}

impl<'a> DFParserBuilder<'a> {
/// Create a new parser builder for the specified tokens using the
/// [`GenericDialect`].
pub fn new(sql: &str) -> Result<Self, ParserError> {
let dialect = &GenericDialect {};
DFParser::new_with_dialect(sql, dialect)
pub fn new(sql: &'a str) -> Self {
Self {
sql,
dialect: &DEFAULT_DIALECT,
recursion_limit: DEFAULT_RECURSION_LIMIT,
}
}

/// Create a new parser for the specified tokens with the
/// specified dialect.
pub fn new_with_dialect(
sql: &str,
dialect: &'a dyn Dialect,
) -> Result<Self, ParserError> {
let mut tokenizer = Tokenizer::new(dialect, sql);
/// Adjust the parser builder's dialect
pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
self.dialect = dialect;
self
}

/// Adjust the recursion limit of sql parsing
pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
self.recursion_limit = recursion_limit;

self
}

pub fn parse_statements(self) -> Result<VecDeque<Statement>, ParserError> {
let mut parser = self.build()?;
parser.parse_statements()
}

pub fn parse_expr(self) -> Result<ExprWithAlias, ParserError> {
let mut parser = self.build()?;
parser.parse_expr()
}

pub fn build(self) -> Result<DFParser<'a>, ParserError> {
let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
let tokens = tokenizer.tokenize_with_location()?;

Ok(DFParser {
parser: Parser::new(dialect).with_tokens_with_locations(tokens),
parser: Parser::new(self.dialect)
.with_tokens_with_locations(tokens)
.with_recursion_limit(self.recursion_limit),
})
}
}

impl<'a> DFParser<'a> {
/// Parse a sql string into one or [`Statement`]s using the
/// [`GenericDialect`].
pub fn parse_sql(sql: &str) -> Result<VecDeque<Statement>, ParserError> {
let dialect = &GenericDialect {};
DFParser::parse_sql_with_dialect(sql, dialect)
pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, ParserError> {
let mut parser = DFParserBuilder::new(sql).build()?;

parser.parse_statements()
}

/// Parse a SQL string and produce one or more [`Statement`]s with
Expand All @@ -304,37 +340,43 @@ impl<'a> DFParser<'a> {
sql: &str,
dialect: &dyn Dialect,
) -> Result<VecDeque<Statement>, ParserError> {
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
parser.parse_statements()
}

pub fn parse_sql_into_expr_with_dialect(
sql: &str,
dialect: &dyn Dialect,
) -> Result<ExprWithAlias, ParserError> {
let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;

parser.parse_expr()
}

/// Parse a sql string into one or [`Statement`]s
pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, ParserError> {
let mut stmts = VecDeque::new();
let mut expecting_statement_delimiter = false;
loop {
// ignore empty statements (between successive statement delimiters)
while parser.parser.consume_token(&Token::SemiColon) {
while self.parser.consume_token(&Token::SemiColon) {
expecting_statement_delimiter = false;
}

if parser.parser.peek_token() == Token::EOF {
if self.parser.peek_token() == Token::EOF {
break;
}
if expecting_statement_delimiter {
return parser.expected("end of statement", parser.parser.peek_token());
return self.expected("end of statement", self.parser.peek_token());
}

let statement = parser.parse_statement()?;
let statement = self.parse_statement()?;
stmts.push_back(statement);
expecting_statement_delimiter = true;
}
Ok(stmts)
}

pub fn parse_sql_into_expr_with_dialect(
sql: &str,
dialect: &dyn Dialect,
) -> Result<ExprWithAlias, ParserError> {
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
parser.parse_expr()
}

/// Report an unexpected token
fn expected<T>(
&self,
Expand Down Expand Up @@ -1613,4 +1655,17 @@ mod tests {
fn verified_stmt(sql: &str) -> Statement {
one_statement_parses_to(sql, sql)
}

#[test]
/// Checks the recursion limit works for sql queries
/// Recursion can happen easily with binary exprs (i.e, AND or OR)
fn test_recursion_limit() {
let sql = "SELECT 1 OR 2";

assert!(DFParserBuilder::new(sql).parse_statements().is_ok());
assert!(DFParserBuilder::new(sql)
.with_recursion_limit(1)
.parse_statements()
.is_err());
}
}
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.recursion_limit 50
datafusion.sql_parser.support_varchar_with_length true

# show all variables with verbose
Expand Down Expand Up @@ -359,6 +360,7 @@ datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusi
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

# show_variable_in_config_options
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |