Skip to content

Commit 15dbe2e

Browse files
committed
Allow setting the recursion limit for sql parsing
1 parent fb40506 commit 15dbe2e

File tree

6 files changed

+96
-36
lines changed

6 files changed

+96
-36
lines changed

datafusion-examples/examples/sql_dialect.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::fmt::Display;
1919

2020
use datafusion::error::Result;
2121
use datafusion::sql::{
22-
parser::{CopyToSource, CopyToStatement, DFParser, Statement},
22+
parser::{CopyToSource, CopyToStatement, DFParser, DFParserBuilder, Statement},
2323
sqlparser::{keywords::Keyword, parser::ParserError, tokenizer::Token},
2424
};
2525

@@ -46,9 +46,9 @@ struct MyParser<'a> {
4646
df_parser: DFParser<'a>,
4747
}
4848

49-
impl MyParser<'_> {
50-
fn new(sql: &str) -> Result<Self> {
51-
let df_parser = DFParser::new(sql)?;
49+
impl<'a> MyParser<'a> {
50+
fn new(sql: &'a str) -> Result<Self> {
51+
let df_parser = DFParserBuilder::new(sql).build()?;
5252
Ok(Self { df_parser })
5353
}
5454

datafusion/common/src/config.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ config_namespace! {
256256
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
257257
/// and recorded in the logical plan nodes.
258258
pub collect_spans: bool, default = false
259+
260+
/// Specifies the recursion depth limit when parsing complex SQL Queries
261+
pub recursion_limit: usize, default = 50
259262
}
260263
}
261264

datafusion/core/src/execution/session_state.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
6868
use datafusion_physical_optimizer::optimizer::PhysicalOptimizer;
6969
use datafusion_physical_optimizer::PhysicalOptimizerRule;
7070
use datafusion_physical_plan::ExecutionPlan;
71-
use datafusion_sql::parser::{DFParser, Statement};
71+
use datafusion_sql::parser::{DFParserBuilder, Statement};
7272
use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel};
7373

7474
use async_trait::async_trait;
@@ -483,12 +483,20 @@ impl SessionState {
483483
MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks."
484484
)
485485
})?;
486-
let mut statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
486+
487+
let recursion_limit = self.config.options().sql_parser.recursion_limit;
488+
489+
let mut statements = DFParserBuilder::new(sql)
490+
.with_dialect(dialect.as_ref())
491+
.with_recursion_limit(recursion_limit)
492+
.parse_statements()?;
493+
487494
if statements.len() > 1 {
488495
return not_impl_err!(
489496
"The context currently only supports a single SQL statement"
490497
);
491498
}
499+
492500
let statement = statements.pop_front().ok_or_else(|| {
493501
plan_datafusion_err!("No SQL statements were provided in the query string")
494502
})?;
@@ -522,7 +530,11 @@ impl SessionState {
522530
)
523531
})?;
524532

525-
let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?;
533+
let recursion_limit = self.config.options().sql_parser.recursion_limit;
534+
let expr = DFParserBuilder::new(sql)
535+
.with_dialect(dialect.as_ref())
536+
.with_recursion_limit(recursion_limit)
537+
.parse_expr()?;
526538

527539
Ok(expr)
528540
}

datafusion/sql/src/parser.rs

Lines changed: 71 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -269,33 +269,69 @@ pub struct DFParser<'a> {
269269
pub parser: Parser<'a>,
270270
}
271271

272-
impl<'a> DFParser<'a> {
273-
/// Create a new parser for the specified tokens using the
272+
/// Same as `sqlparser`
273+
const DEFAULT_RECURSION_LIMIT: usize = 50;
274+
const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
275+
276+
pub struct DFParserBuilder<'a> {
277+
pub sql: &'a str,
278+
pub dialect: &'a dyn Dialect,
279+
pub recursion_limit: usize,
280+
}
281+
282+
impl<'a> DFParserBuilder<'a> {
283+
/// Create a new parser builder for the specified tokens using the
274284
/// [`GenericDialect`].
275-
pub fn new(sql: &str) -> Result<Self, ParserError> {
276-
let dialect = &GenericDialect {};
277-
DFParser::new_with_dialect(sql, dialect)
285+
pub fn new(sql: &'a str) -> Self {
286+
Self {
287+
sql,
288+
dialect: &DEFAULT_DIALECT,
289+
recursion_limit: DEFAULT_RECURSION_LIMIT,
290+
}
278291
}
279292

280-
/// Create a new parser for the specified tokens with the
281-
/// specified dialect.
282-
pub fn new_with_dialect(
283-
sql: &str,
284-
dialect: &'a dyn Dialect,
285-
) -> Result<Self, ParserError> {
286-
let mut tokenizer = Tokenizer::new(dialect, sql);
293+
/// Adjust the parser builder's dialect
294+
pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
295+
self.dialect = dialect;
296+
self
297+
}
298+
299+
/// Adjust the recursion limit of sql parsing
300+
pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
301+
self.recursion_limit = recursion_limit;
302+
303+
self
304+
}
305+
306+
pub fn parse_statements(self) -> Result<VecDeque<Statement>, ParserError> {
307+
let mut parser = self.build()?;
308+
parser.parse_statements()
309+
}
310+
311+
pub fn parse_expr(self) -> Result<ExprWithAlias, ParserError> {
312+
let mut parser = self.build()?;
313+
parser.parse_expr()
314+
}
315+
316+
pub fn build(self) -> Result<DFParser<'a>, ParserError> {
317+
let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
287318
let tokens = tokenizer.tokenize_with_location()?;
288319

289320
Ok(DFParser {
290-
parser: Parser::new(dialect).with_tokens_with_locations(tokens),
321+
parser: Parser::new(self.dialect)
322+
.with_tokens_with_locations(tokens)
323+
.with_recursion_limit(self.recursion_limit),
291324
})
292325
}
326+
}
293327

328+
impl<'a> DFParser<'a> {
294329
/// Parse a sql string into one or [`Statement`]s using the
295330
/// [`GenericDialect`].
296-
pub fn parse_sql(sql: &str) -> Result<VecDeque<Statement>, ParserError> {
297-
let dialect = &GenericDialect {};
298-
DFParser::parse_sql_with_dialect(sql, dialect)
331+
pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, ParserError> {
332+
let mut parser = DFParserBuilder::new(sql).build()?;
333+
334+
parser.parse_statements()
299335
}
300336

301337
/// Parse a SQL string and produce one or more [`Statement`]s with
@@ -304,37 +340,43 @@ impl<'a> DFParser<'a> {
304340
sql: &str,
305341
dialect: &dyn Dialect,
306342
) -> Result<VecDeque<Statement>, ParserError> {
307-
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
343+
let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
344+
parser.parse_statements()
345+
}
346+
347+
pub fn parse_sql_into_expr_with_dialect(
348+
sql: &str,
349+
dialect: &dyn Dialect,
350+
) -> Result<ExprWithAlias, ParserError> {
351+
let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
352+
353+
parser.parse_expr()
354+
}
355+
356+
/// Parse a sql string into one or [`Statement`]s
357+
pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, ParserError> {
308358
let mut stmts = VecDeque::new();
309359
let mut expecting_statement_delimiter = false;
310360
loop {
311361
// ignore empty statements (between successive statement delimiters)
312-
while parser.parser.consume_token(&Token::SemiColon) {
362+
while self.parser.consume_token(&Token::SemiColon) {
313363
expecting_statement_delimiter = false;
314364
}
315365

316-
if parser.parser.peek_token() == Token::EOF {
366+
if self.parser.peek_token() == Token::EOF {
317367
break;
318368
}
319369
if expecting_statement_delimiter {
320-
return parser.expected("end of statement", parser.parser.peek_token());
370+
return self.expected("end of statement", self.parser.peek_token());
321371
}
322372

323-
let statement = parser.parse_statement()?;
373+
let statement = self.parse_statement()?;
324374
stmts.push_back(statement);
325375
expecting_statement_delimiter = true;
326376
}
327377
Ok(stmts)
328378
}
329379

330-
pub fn parse_sql_into_expr_with_dialect(
331-
sql: &str,
332-
dialect: &dyn Dialect,
333-
) -> Result<ExprWithAlias, ParserError> {
334-
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
335-
parser.parse_expr()
336-
}
337-
338380
/// Report an unexpected token
339381
fn expected<T>(
340382
&self,

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ datafusion.sql_parser.dialect generic
263263
datafusion.sql_parser.enable_ident_normalization true
264264
datafusion.sql_parser.enable_options_value_normalization false
265265
datafusion.sql_parser.parse_float_as_decimal false
266+
datafusion.sql_parser.recursion_limit 50
266267
datafusion.sql_parser.support_varchar_with_length true
267268

268269
# show all variables with verbose
@@ -359,6 +360,7 @@ datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusi
359360
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
360361
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
361362
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
363+
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
362364
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
363365

364366
# show_variable_in_config_options

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
128128
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. |
129129
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
130130
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
131+
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |

0 commit comments

Comments
 (0)