Skip to content

Commit 9ffaea5

Browse files
committed
Allow setting the recursion limit for sql parsing
1 parent fb40506 commit 9ffaea5

File tree

5 files changed

+62
-6
lines changed

5 files changed

+62
-6
lines changed

datafusion/common/src/config.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ config_namespace! {
256256
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
257257
/// and recorded in the logical plan nodes.
258258
pub collect_spans: bool, default = false
259+
260+
/// Specifies the recursion depth limit when parsing complex SQL Queries
261+
pub recursion_limit: usize, default = 50
259262
}
260263
}
261264

datafusion/core/src/execution/session_state.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,12 +483,21 @@ impl SessionState {
483483
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
484484
)
485485
})?;
486-
let mut statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
486+
487+
let recursion_limit = self.config.options().sql_parser.recursion_limit;
488+
489+
let mut statements = DFParser::parse_sql_with_dialect_limit(
490+
sql,
491+
dialect.as_ref(),
492+
recursion_limit,
493+
)?;
494+
487495
if statements.len() > 1 {
488496
return not_impl_err!(
489497
"The context currently only supports a single SQL statement"
490498
);
491499
}
500+
492501
let statement = statements.pop_front().ok_or_else(|| {
493502
plan_datafusion_err!("No SQL statements were provided in the query string")
494503
})?;
@@ -522,7 +531,12 @@ impl SessionState {
522531
)
523532
})?;
524533

525-
let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?;
534+
let recursion_limit = self.config.options().sql_parser.recursion_limit;
535+
let expr = DFParser::parse_sql_into_expr_with_dialect_limit(
536+
sql,
537+
dialect.as_ref(),
538+
recursion_limit,
539+
)?;
526540

527541
Ok(expr)
528542
}

datafusion/sql/src/parser.rs

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
257257
Ok(())
258258
}
259259

260+
/// Same as `sqlparser`
261+
const DEFAULT_RECURSION_LIMIT: usize = 50;
262+
260263
/// DataFusion SQL Parser based on [`sqlparser`]
261264
///
262265
/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
@@ -282,20 +285,31 @@ impl<'a> DFParser<'a> {
282285
pub fn new_with_dialect(
283286
sql: &str,
284287
dialect: &'a dyn Dialect,
288+
) -> Result<Self, ParserError> {
289+
DFParser::new_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
290+
}
291+
/// Create a new parser for the specified tokens with the
292+
/// specified dialect and recursion limit
293+
pub fn new_with_dialect_limit(
294+
sql: &str,
295+
dialect: &'a dyn Dialect,
296+
recursion_limit: usize,
285297
) -> Result<Self, ParserError> {
286298
let mut tokenizer = Tokenizer::new(dialect, sql);
287299
let tokens = tokenizer.tokenize_with_location()?;
288300

289301
Ok(DFParser {
290-
parser: Parser::new(dialect).with_tokens_with_locations(tokens),
302+
parser: Parser::new(dialect)
303+
.with_tokens_with_locations(tokens)
304+
.with_recursion_limit(recursion_limit),
291305
})
292306
}
293307

294308
/// Parse a sql string into one or [`Statement`]s using the
295309
/// [`GenericDialect`].
296310
pub fn parse_sql(sql: &str) -> Result<VecDeque<Statement>, ParserError> {
297311
let dialect = &GenericDialect {};
298-
DFParser::parse_sql_with_dialect(sql, dialect)
312+
DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
299313
}
300314

301315
/// Parse a SQL string and produce one or more [`Statement`]s with
@@ -304,7 +318,17 @@ impl<'a> DFParser<'a> {
304318
sql: &str,
305319
dialect: &dyn Dialect,
306320
) -> Result<VecDeque<Statement>, ParserError> {
307-
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
321+
DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
322+
}
323+
324+
/// Parse a SQL string and produce one or more [`Statement`]s with
325+
/// with the specified dialect and recursion limit
326+
pub fn parse_sql_with_dialect_limit(
327+
sql: &str,
328+
dialect: &dyn Dialect,
329+
recursion_limit: usize,
330+
) -> Result<VecDeque<Statement>, ParserError> {
331+
let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?;
308332
let mut stmts = VecDeque::new();
309333
let mut expecting_statement_delimiter = false;
310334
loop {
@@ -331,7 +355,19 @@ impl<'a> DFParser<'a> {
331355
sql: &str,
332356
dialect: &dyn Dialect,
333357
) -> Result<ExprWithAlias, ParserError> {
334-
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
358+
DFParser::parse_sql_into_expr_with_dialect_limit(
359+
sql,
360+
dialect,
361+
DEFAULT_RECURSION_LIMIT,
362+
)
363+
}
364+
365+
pub fn parse_sql_into_expr_with_dialect_limit(
366+
sql: &str,
367+
dialect: &dyn Dialect,
368+
recursion_limit: usize,
369+
) -> Result<ExprWithAlias, ParserError> {
370+
let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?;
335371
parser.parse_expr()
336372
}
337373

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ datafusion.sql_parser.dialect generic
263263
datafusion.sql_parser.enable_ident_normalization true
264264
datafusion.sql_parser.enable_options_value_normalization false
265265
datafusion.sql_parser.parse_float_as_decimal false
266+
datafusion.sql_parser.recursion_limit 50
266267
datafusion.sql_parser.support_varchar_with_length true
267268

268269
# show all variables with verbose
@@ -359,6 +360,7 @@ datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusi
359360
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
360361
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
361362
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
363+
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
362364
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
363365

364366
# show_variable_in_config_options

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
128128
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. |
129129
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
130130
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
131+
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |

0 commit comments

Comments
 (0)