Skip to content

Commit 710d341

Browse files
authored
Add remaining path expressions to parser (#124)
* Add remaining path navigation to parser This PR includes changes for enabling Parser to parse Path Navigations as specified in Section 4 of PartiQL Specification. It also ensures the following tests go through: https://github.com/partiql/partiql-tests/blob/c76715ddb95ba6eec2d34ca500077b6e7eba9e35/partiql-test-data/pass/parser/primitives/path-expression.ion The following PR in `partiql-tests` is created for adding additional conformance tests for path expressions: partiql/partiql-tests#13 In addition: - the PR introduces `dbl_quoted` property to `SymbolPrimitive` in order to cater for symbol primitives that are double quoted, e.g. `AS "date". - Splits `Identifier` to `QuotedIdentifier` and `UnquotedIdentifer` in order for the parser to populate the AST with the correct CaseSensitivity.
1 parent 5acb466 commit 710d341

File tree

5 files changed

+337
-141
lines changed

5 files changed

+337
-141
lines changed

partiql-ast/src/ast.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ pub trait ToAstNode: Sized {
1919
/// further [AstNode] construction.
2020
/// ## Example:
2121
/// ```
22+
/// use partiql_ast::ast;
2223
/// use partiql_ast::ast::{SymbolPrimitive, ToAstNode};
24+
/// use partiql_ast::ast::CaseSensitivity::CaseInsensitive;
2325
/// use partiql_source_map::location::{ByteOffset, BytePosition, Location, ToLocated};
2426
///
2527
/// let p = SymbolPrimitive {
26-
/// value: "symbol2".to_string()
28+
/// value: "symbol2".to_string(),
29+
/// case: Some(ast::CaseSensitivity::CaseInsensitive)
2730
/// };
2831
///
2932
/// let node = p
@@ -576,7 +579,6 @@ pub enum PathStep {
576579
#[derive(Clone, Debug, PartialEq)]
577580
pub struct PathExpr {
578581
pub index: Box<Expr>,
579-
pub case: CaseSensitivity,
580582
}
581583

582584
/// Is used to determine if variable lookup should be case-sensitive or not.
@@ -871,6 +873,8 @@ pub struct CustomType {
871873
#[derive(Clone, Debug, PartialEq)]
872874
pub struct SymbolPrimitive {
873875
pub value: String,
876+
// Optional because string literal symbols don't have case sensitivity
877+
pub case: Option<CaseSensitivity>,
874878
}
875879

876880
#[derive(Clone, Debug, PartialEq)]

partiql-ast/tests/test_ast.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ fn test_ast_init() {
2222

2323
let span_only = ast::SymbolPrimitive {
2424
value: "symbol1".to_string(),
25+
case: Some(CaseSensitivity::CaseInsensitive),
2526
}
2627
.to_node()
2728
.location(Location {

partiql-parser/src/lexer.rs

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -462,17 +462,21 @@ pub enum Token<'input> {
462462

463463
// unquoted identifiers
464464
#[regex("[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| lex.slice())]
465+
UnquotedIdent(&'input str),
466+
465467
// quoted identifiers (quoted with double quotes)
466468
#[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#,
467469
|lex| lex.slice().trim_matches('"'))]
468-
Identifier(&'input str),
470+
QuotedIdent(&'input str),
469471

470472
// unquoted @identifiers
471473
#[regex("@[a-zA-Z_$][a-zA-Z0-9_$]*", |lex| &lex.slice()[1..])]
474+
UnquotedAtIdentifier(&'input str),
475+
472476
// quoted @identifiers (quoted with double quotes)
473477
#[regex(r#"@"([^"\\]|\\t|\\u|\\n|\\")*""#,
474478
|lex| lex.slice()[1..].trim_matches('"'))]
475-
AtIdentifier(&'input str),
479+
QuotedAtIdentifier(&'input str),
476480

477481
#[regex("[0-9]+", |lex| lex.slice())]
478482
Int(&'input str),
@@ -645,8 +649,10 @@ impl<'input> fmt::Display for Token<'input> {
645649
Token::Caret => write!(f, "^"),
646650
Token::Period => write!(f, "."),
647651
Token::DblPipe => write!(f, "||"),
648-
Token::Identifier(id) => write!(f, "<{}:IDENT>", id),
649-
Token::AtIdentifier(id) => write!(f, "<{}:@IDENT>", id),
652+
Token::UnquotedIdent(id) => write!(f, "<{}:UNQUOTED_IDENT>", id),
653+
Token::QuotedIdent(id) => write!(f, "<{}:QUOTED_IDENT>", id),
654+
Token::UnquotedAtIdentifier(id) => write!(f, "<{}:UNQUOTED_ATIDENT>", id),
655+
Token::QuotedAtIdentifier(id) => write!(f, "<{}:QUOTED_ATIDENT>", id),
650656
Token::Int(txt) => write!(f, "<{}:INT>", txt),
651657
Token::ExpReal(txt) => write!(f, "<{}:REAL>", txt),
652658
Token::Real(txt) => write!(f, "<{}:REAL>", txt),
@@ -730,7 +736,7 @@ mod tests {
730736
fn display() -> Result<(), ParseError<'static, BytePosition>> {
731737
let symbols =
732738
"( [ { } ] ) << >> ; , < > <= >= != <> = == - + * % / ^ . || : --foo /*block*/";
733-
let primitives = "ident @atident";
739+
let primitives = r#"unquoted_ident "quoted_ident" @unquoted_atident @"quoted_atident""#;
734740
let keywords =
735741
"WiTH Where Value uSiNg Unpivot UNION True Select right Preserve pivoT Outer Order Or \
736742
On Offset Nulls Null Not Natural Missing Limit Like Left Lateral Last Join \
@@ -753,10 +759,11 @@ mod tests {
753759
"PIVOT", ">", "OUTER", "<=", "ORDER", ">=", "OR", "!=", "ON", "<>", "OFFSET",
754760
"=", "NULLS", "==", "NULL", "-", "NOT", "+", "NATURAL", "*", "MISSING", "%",
755761
"LIMIT", "/", "LIKE", "^", "LEFT", ".", "LATERAL", "||", "LAST", ":", "JOIN",
756-
"--", "INTERSECT", "/**/", "IS", "<ident:IDENT>", "INNER", "<atident:@IDENT>", "IN",
757-
"HAVING", "GROUP", "FROM", "FULL", "FIRST", "FALSE", "EXCEPT", "ESCAPE", "DESC",
758-
"CROSS", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES", "CASE", "WHEN",
759-
"THEN", "ELSE", "END",
762+
"--", "INTERSECT", "/**/","IS", "<unquoted_ident:UNQUOTED_IDENT>", "INNER",
763+
"<quoted_ident:QUOTED_IDENT>", "IN", "<unquoted_atident:UNQUOTED_ATIDENT>", "HAVING",
764+
"<quoted_atident:QUOTED_ATIDENT>", "GROUP", "FROM", "FULL", "FIRST", "FALSE", "EXCEPT",
765+
"ESCAPE", "DESC", "CROSS", "BY", "BETWEEN", "AT", "AS", "AND", "ASC", "ALL", "VALUES",
766+
"CASE", "WHEN", "THEN", "ELSE", "END",
760767
];
761768
let displayed = toks
762769
.into_iter()
@@ -840,20 +847,22 @@ mod tests {
840847

841848
#[test]
842849
fn select() -> Result<(), ParseError<'static, BytePosition>> {
843-
let query = "SELECT g\nFROM data\nGROUP BY a";
850+
let query = r#"SELECT g
851+
FROM "data"
852+
GROUP BY a"#;
844853
let mut offset_tracker = LineOffsetTracker::default();
845854
let lexer = PartiqlLexer::new(query, &mut offset_tracker);
846855
let toks: Vec<_> = lexer.collect::<Result<_, _>>()?;
847856

848857
assert_eq!(
849858
vec![
850859
Token::Select,
851-
Token::Identifier("g"),
860+
Token::UnquotedIdent("g"),
852861
Token::From,
853-
Token::Identifier("data"),
862+
Token::QuotedIdent("data"),
854863
Token::Group,
855864
Token::By,
856-
Token::Identifier("a")
865+
Token::UnquotedIdent("a")
857866
],
858867
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
859868
);
@@ -873,7 +882,7 @@ mod tests {
873882
);
874883
assert_eq!(
875884
LineAndColumn::from(offset_tracker.at(query, 19.into()).unwrap()),
876-
LineAndColumn::new(3, 1).unwrap()
885+
LineAndColumn::new(2, 11).unwrap()
877886
);
878887

879888
let offset_r_a = query.rfind('a').unwrap();
@@ -900,12 +909,12 @@ mod tests {
900909
assert_eq!(
901910
vec![
902911
Token::Select,
903-
Token::Identifier("🐈"),
912+
Token::QuotedIdent("🐈"),
904913
Token::From,
905-
Token::Identifier("❤ℝ"),
914+
Token::QuotedIdent("❤ℝ"),
906915
Token::Group,
907916
Token::By,
908-
Token::Identifier("🧸")
917+
Token::QuotedIdent("🧸")
909918
],
910919
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
911920
);
@@ -981,9 +990,9 @@ mod tests {
981990
vec![
982991
Token::Select,
983992
Token::CommentLine("--comment"),
984-
Token::AtIdentifier("g"),
993+
Token::UnquotedAtIdentifier("g"),
985994
Token::From,
986-
Token::AtIdentifier("foo"),
995+
Token::QuotedAtIdentifier("foo"),
987996
],
988997
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
989998
);
@@ -1002,7 +1011,7 @@ mod tests {
10021011
vec![
10031012
Token::Select,
10041013
Token::CommentBlock("/*comment*/"),
1005-
Token::Identifier("g"),
1014+
Token::UnquotedIdent("g"),
10061015
],
10071016
toks.into_iter().map(|(_s, t, _e)| t).collect::<Vec<_>>()
10081017
);

partiql-parser/src/parse/mod.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,66 @@ mod tests {
294294
}
295295
}
296296

297+
mod pathexpr {
298+
use super::*;
299+
300+
#[test]
301+
fn nested() {
302+
parse!(r#"a.b"#);
303+
parse!(r#"a.b.c['item']."d"[5].e['s'].f[1+2]"#);
304+
parse!(r#"a.b.*"#);
305+
parse!(r#"a.b[*]"#);
306+
parse!(r#"@a.b[*]"#);
307+
parse!(r#"@"a".b[*]"#);
308+
parse!(r#"tables.items[*].product.*.nest"#);
309+
}
310+
311+
#[test]
312+
fn tuple() {
313+
parse!(r#"{'a':1 , 'data': 2}.a"#);
314+
parse!(r#"{'a':1 , 'data': 2}.'a'"#);
315+
parse!(r#"{'A':1 , 'data': 2}."A""#);
316+
parse!(r#"{'A':1 , 'data': 2}['a']"#);
317+
parse!(r#"{'attr': 1, 'b':2}[v || w]"#);
318+
parse!(r#"{'a':1, 'b':2}.*"#);
319+
}
320+
321+
#[test]
322+
fn array() {
323+
parse!(r#"[1,2,3][0]"#);
324+
parse!(r#"[1,2,3][1 + 1]"#);
325+
parse!(r#"[1,2,3][*]"#);
326+
}
327+
328+
#[test]
329+
fn query() {
330+
parse!(r#"(SELECT a FROM table).a"#);
331+
parse!(r#"(SELECT a FROM table).'a'"#);
332+
parse!(r#"(SELECT a FROM table)."a""#);
333+
parse!(r#"(SELECT a FROM table)['a']"#);
334+
parse!(r#"(SELECT a FROM table).*"#);
335+
parse!(r#"(SELECT a FROM table)[*]"#);
336+
}
337+
338+
#[test]
339+
fn function_call() {
340+
parse!(r#"foo(x, y).a"#);
341+
parse!(r#"foo(x, y).*"#);
342+
parse!(r#"foo(x, y)[*]"#);
343+
parse!(r#"foo(x, y)[5]"#);
344+
parse!(r#"foo(x, y).a.*"#);
345+
}
346+
347+
#[test]
348+
#[should_panic]
349+
fn erroneous() {
350+
parse!(r#"a.b.['item']"#);
351+
parse!(r#"a.b.{'a': 1, 'b': 2}.a"#);
352+
parse!(r#"a.b.[1, 2, 3][2]"#);
353+
parse!(r#"a.b.[*]"#);
354+
}
355+
}
356+
297357
mod sfw {
298358
use super::*;
299359

0 commit comments

Comments
 (0)