Skip to content

Commit 14d7ef4

Browse files
committed
add ability to customize tokens in parser
1 parent 05802e2 commit 14d7ef4

File tree

1 file changed

+71
-15
lines changed

1 file changed

+71
-15
lines changed

datafusion/sql/src/parser.rs

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -363,28 +363,48 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
363363
/// # Ok(())
364364
/// # }
365365
/// ```
366-
pub struct DFParserBuilder<'a> {
367-
/// The SQL string to parse
368-
sql: &'a str,
366+
pub struct DFParserBuilder<'a, 'b> {
367+
/// Parser input: either raw SQL or tokens
368+
input: ParserInput<'a>,
369369
/// The Dialect to use (defaults to [`GenericDialect`]
370-
dialect: &'a dyn Dialect,
370+
dialect: &'b dyn Dialect,
371371
/// The recursion limit while parsing
372372
recursion_limit: usize,
373373
}
374374

375-
impl<'a> DFParserBuilder<'a> {
375+
/// Describes a possible input for parser
376+
pub enum ParserInput<'a> {
377+
/// Raw SQL. Tokenization will be performed automatically
378+
Sql(&'a str),
379+
/// Tokens
380+
Tokens(Vec<TokenWithSpan>),
381+
}
382+
383+
impl<'a> From<&'a str> for ParserInput<'a> {
384+
fn from(sql: &'a str) -> Self {
385+
Self::Sql(sql)
386+
}
387+
}
388+
389+
impl From<Vec<TokenWithSpan>> for ParserInput<'static> {
390+
fn from(tokens: Vec<TokenWithSpan>) -> Self {
391+
Self::Tokens(tokens)
392+
}
393+
}
394+
395+
impl<'a, 'b> DFParserBuilder<'a, 'b> {
376396
/// Create a new parser builder for the specified tokens using the
377397
/// [`GenericDialect`].
378-
pub fn new(sql: &'a str) -> Self {
398+
pub fn new(input: impl Into<ParserInput<'a>>) -> Self {
379399
Self {
380-
sql,
400+
input: input.into(),
381401
dialect: &DEFAULT_DIALECT,
382402
recursion_limit: DEFAULT_RECURSION_LIMIT,
383403
}
384404
}
385405

386406
/// Adjust the parser builder's dialect. Defaults to [`GenericDialect`]
387-
pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self {
407+
pub fn with_dialect(mut self, dialect: &'b dyn Dialect) -> Self {
388408
self.dialect = dialect;
389409
self
390410
}
@@ -395,12 +415,18 @@ impl<'a> DFParserBuilder<'a> {
395415
self
396416
}
397417

398-
pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
399-
let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
400-
// Convert TokenizerError -> ParserError
401-
let tokens = tokenizer
402-
.tokenize_with_location()
403-
.map_err(ParserError::from)?;
418+
/// Build resulting parser
419+
pub fn build(self) -> Result<DFParser<'b>, DataFusionError> {
420+
let tokens = match self.input {
421+
ParserInput::Tokens(tokens) => tokens,
422+
ParserInput::Sql(sql) => {
423+
let mut tokenizer = Tokenizer::new(self.dialect, sql);
424+
// Convert TokenizerError -> ParserError
425+
tokenizer
426+
.tokenize_with_location()
427+
.map_err(ParserError::from)?
428+
}
429+
};
404430

405431
Ok(DFParser {
406432
parser: Parser::new(self.dialect)
@@ -1162,7 +1188,7 @@ mod tests {
11621188
BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan,
11631189
};
11641190
use sqlparser::dialect::SnowflakeDialect;
1165-
use sqlparser::tokenizer::Span;
1191+
use sqlparser::tokenizer::{Location, Span, Whitespace};
11661192

11671193
fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
11681194
let statements = DFParser::parse_sql(sql)?;
@@ -2068,6 +2094,36 @@ mod tests {
20682094
);
20692095
}
20702096

2097+
#[test]
2098+
fn test_custom_tokens() {
2099+
// Span mock.
2100+
let span = Span {
2101+
start: Location { line: 0, column: 0 },
2102+
end: Location { line: 0, column: 0 },
2103+
};
2104+
let tokens = vec![
2105+
TokenWithSpan {
2106+
token: Token::make_keyword("SELECT"),
2107+
span,
2108+
},
2109+
TokenWithSpan {
2110+
token: Token::Whitespace(Whitespace::Space),
2111+
span,
2112+
},
2113+
TokenWithSpan {
2114+
token: Token::Placeholder("1".to_string()),
2115+
span,
2116+
},
2117+
];
2118+
2119+
let statements = DFParserBuilder::new(tokens)
2120+
.build()
2121+
.unwrap()
2122+
.parse_statements()
2123+
.unwrap();
2124+
assert_eq!(statements.len(), 1);
2125+
}
2126+
20712127
fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) {
20722128
let expr = DFParser::parse_sql_into_expr(sql).unwrap();
20732129
assert_eq!(expr, expected, "actual:\n{expr:#?}");

0 commit comments

Comments
 (0)