Skip to content

Commit eb5eb57

Browse files
committed
add ability to customize tokens in parser
1 parent 1636898 commit eb5eb57

File tree

1 file changed

+72
-15
lines changed

1 file changed

+72
-15
lines changed

datafusion/sql/src/parser.rs

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -363,28 +363,49 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
363363
/// # Ok(())
364364
/// # }
365365
/// ```
366-
pub struct DFParserBuilder<'a> {
367-
/// The SQL string to parse
368-
sql: &'a str,
366+
pub struct DFParserBuilder<'a, 'b> {
367+
/// Parser input: either raw SQL or tokens
368+
input: ParserInput<'a>,
369369
/// The Dialect to use (defaults to [`GenericDialect`]
370-
dialect: &'a dyn Dialect,
370+
dialect: &'b dyn Dialect,
371371
/// The recursion limit while parsing
372372
recursion_limit: usize,
373373
}
374374

375-
impl<'a> DFParserBuilder<'a> {
375+
/// Describes a possible input for parser
376+
pub enum ParserInput<'a> {
377+
/// Raw SQL. Tokenization will be performed automatically as a
378+
/// part of [`DFParserBuilder::build`]
379+
Sql(&'a str),
380+
/// Tokens
381+
Tokens(Vec<TokenWithSpan>),
382+
}
383+
384+
impl<'a> From<&'a str> for ParserInput<'a> {
385+
fn from(sql: &'a str) -> Self {
386+
Self::Sql(sql)
387+
}
388+
}
389+
390+
impl From<Vec<TokenWithSpan>> for ParserInput<'static> {
391+
fn from(tokens: Vec<TokenWithSpan>) -> Self {
392+
Self::Tokens(tokens)
393+
}
394+
}
395+
396+
impl<'a, 'b> DFParserBuilder<'a, 'b> {
376397
/// Create a new parser builder for the specified tokens using the
377398
/// [`GenericDialect`].
378-
pub fn new(sql: &'a str) -> Self {
399+
pub fn new(input: impl Into<ParserInput<'a>>) -> Self {
379400
Self {
380-
sql,
401+
input: input.into(),
381402
dialect: &DEFAULT_DIALECT,
382403
recursion_limit: DEFAULT_RECURSION_LIMIT,
383404
}
384405
}
385406

386407
/// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
387-
pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
408+
pub fn with_dialect(mut self, dialect: &'b dyn Dialect) -> Self {
388409
self.dialect = dialect;
389410
self
390411
}
@@ -395,12 +416,18 @@ impl<'a> DFParserBuilder<'a> {
395416
self
396417
}
397418

398-
pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
399-
let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
400-
// Convert TokenizerError -> ParserError
401-
let tokens = tokenizer
402-
.tokenize_with_location()
403-
.map_err(ParserError::from)?;
419+
/// Build resulting parser
420+
pub fn build(self) -> Result<DFParser<'b>, DataFusionError> {
421+
let tokens = match self.input {
422+
ParserInput::Tokens(tokens) => tokens,
423+
ParserInput::Sql(sql) => {
424+
let mut tokenizer = Tokenizer::new(self.dialect, sql);
425+
// Convert TokenizerError -> ParserError
426+
tokenizer
427+
.tokenize_with_location()
428+
.map_err(ParserError::from)?
429+
}
430+
};
404431

405432
Ok(DFParser {
406433
parser: Parser::new(self.dialect)
@@ -1162,7 +1189,7 @@ mod tests {
11621189
BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
11631190
};
11641191
use sqlparser::dialect::SnowflakeDialect;
1165-
use sqlparser::tokenizer::Span;
1192+
use sqlparser::tokenizer::{Location, Span, Whitespace};
11661193

11671194
fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
11681195
let statements = DFParser::parse_sql(sql)?;
@@ -2068,6 +2095,36 @@ mod tests {
20682095
);
20692096
}
20702097

2098+
#[test]
2099+
fn test_custom_tokens() {
2100+
// Span mock.
2101+
let span = Span {
2102+
start: Location { line: 0, column: 0 },
2103+
end: Location { line: 0, column: 0 },
2104+
};
2105+
let tokens = vec![
2106+
TokenWithSpan {
2107+
token: Token::make_keyword("SELECT"),
2108+
span,
2109+
},
2110+
TokenWithSpan {
2111+
token: Token::Whitespace(Whitespace::Space),
2112+
span,
2113+
},
2114+
TokenWithSpan {
2115+
token: Token::Placeholder("1".to_string()),
2116+
span,
2117+
},
2118+
];
2119+
2120+
let statements = DFParserBuilder::new(tokens)
2121+
.build()
2122+
.unwrap()
2123+
.parse_statements()
2124+
.unwrap();
2125+
assert_eq!(statements.len(), 1);
2126+
}
2127+
20712128
fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
20722129
let expr = DFParser::parse_sql_into_expr(sql).unwrap();
20732130
assert_eq!(expr, expected, "actual:\n{expr:#?}");

0 commit comments

Comments
 (0)