Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,15 @@ pub trait Dialect: Debug + Any {
true
}

/// Determine if nested quote start is presented and return it
fn nested_quote_start(
&self,
_quote_start: char,
mut _chars: Peekable<Chars<'_>>,
) -> Option<char> {
None
}

/// Determine if a character is a valid start character for an unquoted identifier
fn is_identifier_start(&self, ch: char) -> bool;

Expand Down
36 changes: 35 additions & 1 deletion src/dialect/redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,48 @@ impl Dialect for RedshiftSqlDialect {
/// treating them as json path. If there is identifier then we assume
/// there is no json path.
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
// PartiQL (used as json path query language in Redshift) uses square bracket as
// a start character and a quote is a beginning of quoted identifier.
// Skipping analyzing token such as `"a"` and analyze only token that
// can be part of json path potentially.
// For ex., `[0]` (seems part of json path) or `["a"]` (normal quoted identifier)
if let Some(quote_start) = chars.peek() {
if *quote_start == '"' {
return true;
}
};
chars.next();
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
if let Some(&ch) = not_white_chars.peek() {
return self.is_identifier_start(ch);
// PartiQL uses single quote as starting identification inside a quote
// It is a normal identifier if it has no single quote at the beginning.
// Square bracket can contain quoted identifier.
// For ex., `["a"]`, but this is not a part of json path, and it is a normal quoted identifier.
return ch == '"' || self.is_identifier_start(ch);
}
false
}

/// RedShift support nested quoted identifier like `["a"]`.
/// Determine if nested quote started and return it.
fn nested_quote_start(
&self,
quote_start: char,
mut chars: Peekable<Chars<'_>>,
) -> Option<char> {
if quote_start != '[' {
return None;
}

chars.next(); // skip opening quote start

if chars.skip_while(|ch| ch.is_whitespace()).peekable().peek() == Some(&'"') {
Some('"')
} else {
None
}
}

fn is_identifier_start(&self, ch: char) -> bool {
// Extends Postgres dialect with sharp
PostgreSqlDialect {}.is_identifier_start(ch) || ch == '#'
Expand Down
58 changes: 47 additions & 11 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,19 +1081,34 @@ impl<'a> Tokenizer<'a> {
.dialect
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
{
let error_loc = chars.location();
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
let word = if let Some(nested_quote_start) = self
.dialect
.nested_quote_start(quote_start, chars.peekable.clone())
{
chars.next(); // consume the opening quote
Copy link
Contributor Author

@7phs 7phs Dec 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is RedShift specific (nested quotation). The same time tokenisation of quoted identifier is a method of tokeniser and this is a reason of implemented it here.

Let me know if you have a good idea how makes this code more explicit.


let quote_end = Word::matching_end_quote(quote_start);
let error_loc = chars.location();

peeking_take_while(chars, |ch| ch.is_whitespace());
let nested_word =
self.tokenize_quoted_identifier(nested_quote_start, chars)?;
peeking_take_while(chars, |ch| ch.is_whitespace());

if chars.peek() != Some(&quote_end) {
return self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
);
}

chars.next(); // consume the closing nested quote

if last_char == Some(quote_end) {
Ok(Some(Token::make_word(&s, Some(quote_start))))
format!("{nested_quote_start}{nested_word}{nested_quote_start}")
} else {
self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
)
}
self.tokenize_quoted_identifier(quote_start, chars)?
};
Ok(Some(Token::make_word(&word, Some(quote_start))))
}
// numbers and period
'0'..='9' | '.' => {
Expand Down Expand Up @@ -1597,6 +1612,27 @@ impl<'a> Tokenizer<'a> {
s
}

/// Read a quoted identifier
fn tokenize_quoted_identifier(
&self,
quote_start: char,
chars: &mut State,
) -> Result<String, TokenizerError> {
let error_loc = chars.location();
chars.next(); // consume the opening quote
let quote_end = Word::matching_end_quote(quote_start);
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);

if last_char == Some(quote_end) {
Ok(s)
} else {
self.tokenizer_error(
error_loc,
format!("Expected close delimiter '{quote_end}' before EOF."),
)
}
}

/// Read a single quoted string, starting with the opening quote.
fn tokenize_escaped_single_quoted_string(
&self,
Expand Down
42 changes: 42 additions & 0 deletions tests/sqlparser_redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,31 @@ fn test_redshift_json_path() {
},
expr_from_projection(only(&select.projection))
);

let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#;
let select = dialects.verified_only_select(sql);
assert_eq!(
&Expr::JsonAccess {
value: Box::new(Expr::CompoundIdentifier(vec![
Ident::new("db1"),
Ident::new("sc1"),
Ident::new("tbl1"),
Ident::new("col1")
])),
path: JsonPath {
path: vec![
JsonPathElem::Bracket {
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
},
JsonPathElem::Dot {
key: "id".to_string(),
quoted: true,
}
]
}
},
expr_from_projection(only(&select.projection))
);
}

#[test]
Expand Down Expand Up @@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
_ => panic!(),
}
}

#[test]
fn test_parse_select_numbered_columns() {
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
// RedShift specific case - quoted identifier inside square bracket
redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
}

#[test]
fn test_parse_create_numbered_columns() {
redshift_and_generic().verified_stmt(
r#"CREATE TABLE test_table_1 ("1" INT, "d" VARCHAR(155), "2" DOUBLE PRECISION)"#,
);
}
Loading