@@ -186,6 +186,15 @@ impl std::error::Error for ParserError {}
186186// By default, allow expressions up to this deep before erroring
187187const DEFAULT_REMAINING_DEPTH: usize = 50;
188188
189+ // A constant EOF token that can be referenced.
190+ const EOF_TOKEN: TokenWithSpan = TokenWithSpan {
191+ token: Token::EOF,
192+ span: Span {
193+ start: Location { line: 0, column: 0 },
194+ end: Location { line: 0, column: 0 },
195+ },
196+ };
197+
189198/// Composite types declarations using angle brackets syntax can be arbitrary
190199/// nested such that the following declaration is possible:
191200/// `ARRAY<ARRAY<INT>>`
@@ -1236,7 +1245,7 @@ impl<'a> Parser<'a> {
12361245 // Note also that naively `SELECT date` looks like a syntax error because the `date` type
12371246 // name is not followed by a string literal, but in fact in PostgreSQL it is a valid
12381247 // expression that should parse as the column name "date".
1239- let loc = self.peek_token ().span.start;
1248+ let loc = self.peek_token_ref ().span.start;
12401249 let opt_expr = self.maybe_parse(|parser| {
12411250 match parser.parse_data_type()? {
12421251 DataType::Interval => parser.parse_interval(),
@@ -1259,8 +1268,14 @@ impl<'a> Parser<'a> {
12591268 return Ok(expr);
12601269 }
12611270
1262- let next_token = self.next_token();
1263- let expr = match next_token.token {
1271+ // Cache some dialect properties to avoid lifetime issues with the
1272+ // next_token reference.
1273+
1274+ let dialect = self.dialect;
1275+
1276+ let next_token = self.next_token_ref();
1277+ let span = next_token.span;
1278+ let expr = match &next_token.token {
12641279 Token::Word(w) => {
12651280 // The word we consumed may fall into one of two cases: it has a special meaning, or not.
12661281 // For example, in Snowflake, the word `interval` may have two meanings depending on the context:
@@ -1270,14 +1285,13 @@ impl<'a> Parser<'a> {
12701285 //
12711286 // We first try to parse the word and following tokens as a special expression, and if that fails,
12721287 // we rollback and try to parse it as an identifier.
1273- match self.try_parse(|parser| {
1274- parser.parse_expr_prefix_by_reserved_word(&w, next_token.span)
1275- }) {
1288+ let w = w.clone();
1289+ match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
12761290 // This word indicated an expression prefix and parsing was successful
12771291 Ok(Some(expr)) => Ok(expr),
12781292
12791293 // No expression prefix associated with this word
1280- Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token. span)?),
1294+ Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, span)?),
12811295
12821296 // If parsing of the word as a special expression failed, we are facing two options:
12831297 // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`)
@@ -1288,7 +1302,7 @@ impl<'a> Parser<'a> {
12881302 Err(e) => {
12891303 if !self.dialect.is_reserved_for_identifier(w.keyword) {
12901304 if let Ok(Some(expr)) = self.maybe_parse(|parser| {
1291- parser.parse_expr_prefix_by_unreserved_word(&w, next_token. span)
1305+ parser.parse_expr_prefix_by_unreserved_word(&w, span)
12921306 }) {
12931307 return Ok(expr);
12941308 }
@@ -1300,7 +1314,7 @@ impl<'a> Parser<'a> {
13001314 // array `[1, 2, 3]`
13011315 Token::LBracket => self.parse_array_expr(false),
13021316 tok @ Token::Minus | tok @ Token::Plus => {
1303- let op = if tok == Token::Plus {
1317+ let op = if * tok == Token::Plus {
13041318 UnaryOperator::Plus
13051319 } else {
13061320 UnaryOperator::Minus
@@ -1312,20 +1326,16 @@ impl<'a> Parser<'a> {
13121326 ),
13131327 })
13141328 }
1315- Token::ExclamationMark if self.dialect.supports_bang_not_operator() => {
1316- Ok(Expr::UnaryOp {
1317- op: UnaryOperator::BangNot,
1318- expr: Box::new(
1319- self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?,
1320- ),
1321- })
1322- }
1329+ Token::ExclamationMark if dialect.supports_bang_not_operator() => Ok(Expr::UnaryOp {
1330+ op: UnaryOperator::BangNot,
1331+ expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?),
1332+ }),
13231333 tok @ Token::DoubleExclamationMark
13241334 | tok @ Token::PGSquareRoot
13251335 | tok @ Token::PGCubeRoot
13261336 | tok @ Token::AtSign
13271337 | tok @ Token::Tilde
1328- if dialect_of!(self is PostgreSqlDialect) =>
1338+ if dialect_is!(dialect is PostgreSqlDialect) =>
13291339 {
13301340 let op = match tok {
13311341 Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial,
@@ -1342,7 +1352,7 @@ impl<'a> Parser<'a> {
13421352 ),
13431353 })
13441354 }
1345- Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1355+ Token::EscapedStringLiteral(_) if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) =>
13461356 {
13471357 self.prev_token();
13481358 Ok(Expr::Value(self.parse_value()?))
@@ -1408,11 +1418,11 @@ impl<'a> Parser<'a> {
14081418 self.prev_token();
14091419 Ok(Expr::Value(self.parse_value()?))
14101420 }
1411- Token::LBrace if self. dialect.supports_dictionary_syntax() => {
1421+ Token::LBrace if dialect.supports_dictionary_syntax() => {
14121422 self.prev_token();
14131423 self.parse_duckdb_struct_literal()
14141424 }
1415- _ => self.expected ("an expression", next_token ),
1425+ _ => self.expected_current ("an expression"),
14161426 }?;
14171427
14181428 let expr = self.try_parse_method(expr)?;
@@ -3273,11 +3283,17 @@ impl<'a> Parser<'a> {
32733283 }
32743284
32753285 /// Return the first non-whitespace token that has not yet been processed
3276- /// ( or None if reached end-of-file)
3286+ /// or Token::EOF
32773287 pub fn peek_token(&self) -> TokenWithSpan {
32783288 self.peek_nth_token(0)
32793289 }
32803290
3291+ /// Return a reference to the first non-whitespace token that has not yet
3292+ /// been processed or Token::EOF
3293+ pub fn peek_token_ref(&self) -> &TokenWithSpan {
3294+ self.peek_nth_token_ref(0)
3295+ }
3296+
32813297 /// Returns the `N` next non-whitespace tokens that have not yet been
32823298 /// processed.
32833299 ///
@@ -3329,7 +3345,12 @@ impl<'a> Parser<'a> {
33293345 }
33303346
33313347 /// Return nth non-whitespace token that has not yet been processed
3332- pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan {
3348+ pub fn peek_nth_token(&self, n: usize) -> TokenWithSpan {
3349+ self.peek_nth_token_ref(n).clone()
3350+ }
3351+
3352+ /// Return nth non-whitespace token that has not yet been processed
3353+ pub fn peek_nth_token_ref(&self, mut n: usize) -> &TokenWithSpan {
33333354 let mut index = self.index;
33343355 loop {
33353356 index += 1;
@@ -3340,10 +3361,7 @@ impl<'a> Parser<'a> {
33403361 }) => continue,
33413362 non_whitespace => {
33423363 if n == 0 {
3343- return non_whitespace.cloned().unwrap_or(TokenWithSpan {
3344- token: Token::EOF,
3345- span: Span::empty(),
3346- });
3364+ return non_whitespace.unwrap_or(&EOF_TOKEN);
33473365 }
33483366 n -= 1;
33493367 }
@@ -3376,22 +3394,22 @@ impl<'a> Parser<'a> {
33763394 matched
33773395 }
33783396
3397+ pub fn next_token(&mut self) -> TokenWithSpan {
3398+ self.next_token_ref().clone()
3399+ }
3400+
33793401 /// Return the first non-whitespace token that has not yet been processed
33803402 /// (or None if reached end-of-file) and mark it as processed. OK to call
33813403 /// repeatedly after reaching EOF.
3382- pub fn next_token (&mut self) -> TokenWithSpan {
3404+ pub fn next_token_ref (&mut self) -> & TokenWithSpan {
33833405 loop {
33843406 self.index += 1;
33853407 match self.tokens.get(self.index - 1) {
33863408 Some(TokenWithSpan {
33873409 token: Token::Whitespace(_),
33883410 span: _,
33893411 }) => continue,
3390- token => {
3391- return token
3392- .cloned()
3393- .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF))
3394- }
3412+ token => return token.unwrap_or(&EOF_TOKEN),
33953413 }
33963414 }
33973415 }
@@ -3428,6 +3446,15 @@ impl<'a> Parser<'a> {
34283446 )
34293447 }
34303448
3449+ /// Report that the current token was found instead of `expected`.
3450+ pub fn expected_current<T>(&self, expected: &str) -> Result<T, ParserError> {
3451+ let found = self.tokens.get(self.index).unwrap_or(&EOF_TOKEN);
3452+ parser_err!(
3453+ format!("Expected: {expected}, found: {found}"),
3454+ found.span.start
3455+ )
3456+ }
3457+
34313458 /// If the current token is the `expected` keyword, consume it and returns
34323459 /// true. Otherwise, no tokens are consumed and returns false.
34333460 #[must_use]
0 commit comments