@@ -15,6 +15,7 @@ internal static class SqlProcessor
1515 private const char SanitizationPlaceholder = '?' ;
1616 private const char SpaceChar = ' ' ;
1717 private const char CommaChar = ',' ;
18+ private const char OpenSquareBracketChar = '[' ;
1819 private const char OpenParenChar = '(' ;
1920 private const char CloseParenChar = ')' ;
2021 private const char DashChar = '-' ;
@@ -35,6 +36,13 @@ internal static class SqlProcessor
3536 private static readonly SearchValues < char > WhitespaceSearchValues = SearchValues . Create ( WhitespaceChars ) ;
3637#endif
3738
39+ // This is not an exhaustive list but covers the majority of common reserved SQL keywords that may follow a FROM clause.
40+ // This is used when determining if the previous token is a keyword in order to identify the end of a comma separated FROM clause.
41+ // NOTE: These are ordered so that more likely keywords appear first to shorten the comparison loop.
42+ private static readonly string [ ] FromClauseReservedKeywords = [
43+ "WHERE" , "BY" , "AS" , "JOIN" , "WITH" , "CROSS" , "HAVING" , "WINDOW" , "LIMIT" , "OFFSET" , "TABLESAMPLE" , "PIVOT" , "UNPIVOT"
44+ ] ;
45+
3846 // We can extend this in the future to include more keywords if needed.
3947 // The keywords should be ordered by frequency of use to optimize performance.
4048 // This only includes keywords that may be the first keyword in a statement.
@@ -148,9 +156,9 @@ private static bool IsAsciiDigit(char c) =>
148156 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
149157 private static bool IsAsciiIdentifierChar ( char c ) =>
150158#if NET
151- char . IsAsciiLetter ( c ) || char . IsAsciiDigit ( c ) || c == UnderscoreChar || c == DotChar ;
159+ char . IsLetter ( c ) || char . IsAsciiDigit ( c ) || c == UnderscoreChar || c == DotChar ;
152160#else
153- IsAsciiLetter ( c ) || IsAsciiDigit ( c ) || c == UnderscoreChar || c == DotChar ;
161+ char . IsLetter ( c ) || IsAsciiDigit ( c ) || c == UnderscoreChar || c == DotChar ;
154162#endif
155163
156164 private static SqlStatementInfo SanitizeSql ( ReadOnlySpan < char > sql )
@@ -371,6 +379,7 @@ private static void ParseNextToken(
371379 state . ParsePosition += keywordLength ;
372380 state . PreviousTokenStartPosition = start ;
373381 state . PreviousTokenEndPosition = start + keywordLength ;
382+ state . InFromClause = potentialKeywordInfo . SqlKeyword == SqlKeyword . From ;
374383
375384 // No further parsing needed for this token
376385 return ;
@@ -401,14 +410,31 @@ private static void ParseNextToken(
401410 var length = i - start ;
402411 if ( length > 0 )
403412 {
413+ var tokenSpan = sql . Slice ( start , length ) ;
414+
415+ // Special handling: if we are in a FROM clause, check if this identifier is a reserved keyword
416+ // that indicates the end of the FROM clause.
417+ if ( state . InFromClause )
418+ {
419+ foreach ( var reservedKeyword in FromClauseReservedKeywords )
420+ {
421+ if ( IsCaseInsensitiveMatch ( tokenSpan , reservedKeyword ) )
422+ {
423+ // We've matched a reserved keyword so are no longer in the FROM clause.
424+ state . InFromClause = false ;
425+ break ;
426+ }
427+ }
428+ }
429+
404430 // Copy to sanitized buffer.
405- sql . Slice ( start , length ) . CopyTo ( buffer . Slice ( state . SanitizedPosition ) ) ;
431+ tokenSpan . CopyTo ( buffer . Slice ( state . SanitizedPosition ) ) ;
406432 state . SanitizedPosition += length ;
407433
408434 // Optionally copy to summary buffer.
409435 if ( state . CaptureNextTokenInSummary )
410436 {
411- sql . Slice ( start , length ) . CopyTo ( state . SummaryBuffer . Slice ( state . SummaryPosition ) ) ;
437+ tokenSpan . CopyTo ( state . SummaryBuffer . Slice ( state . SummaryPosition ) ) ;
412438 state . SummaryPosition += length ;
413439
414440 // Add a space after the identifier. The trailing space will be trimmed later.
@@ -423,15 +449,35 @@ private static void ParseNextToken(
423449 }
424450 else
425451 {
426- // Special case: if the token is a comma and follows a FROM keyword,
427- // we are in an old style implicit join and we want to capture the next valid identifier in the summary .
428- var prevKeyword = state . PreviousParsedKeyword ? . SqlKeyword ?? SqlKeyword . Unknown ;
429- state . CaptureNextTokenInSummary = prevKeyword == SqlKeyword . From && currentChar == CommaChar ;
452+ // If we are in a FROM clause, we want to capture the next identifier following a comma or open square bracket.
453+ // Commas may occur when listing multiple tables in a FROM clause .
454+ // Brackets may occur when using schema-qualified or delimited identifiers.
455+ state . CaptureNextTokenInSummary = state . InFromClause && ( currentChar == CommaChar || currentChar == OpenSquareBracketChar ) ;
430456
431457 buffer [ state . SanitizedPosition ++ ] = currentChar ;
432458 state . ParsePosition ++ ;
433459
434- // We don't update previous token start/end positions for single-char tokens.
460+ // NOTE: We don't update previous token start/end positions for single-char tokens.
461+ }
462+
463+ static bool IsCaseInsensitiveMatch ( ReadOnlySpan < char > tokenSpan , string reservedKeyword )
464+ {
465+ if ( tokenSpan . Length != reservedKeyword . Length )
466+ {
467+ return false ;
468+ }
469+
470+ var reservedKeywordSpan = reservedKeyword . AsSpan ( ) ;
471+
472+ for ( var charPos = 0 ; charPos < tokenSpan . Length ; charPos ++ )
473+ {
474+ if ( ( tokenSpan [ charPos ] | 0x20 ) != ( reservedKeywordSpan [ charPos ] | 0x20 ) )
475+ {
476+ return false ;
477+ }
478+ }
479+
480+ return true ;
435481 }
436482 }
437483
@@ -724,6 +770,10 @@ private ref struct ParseState
724770 // Stored in state to avoid slicing repeatedly.
725771 public Span < char > SummaryBuffer ;
726772
773+ /// <summary>
774+ /// Will be set if a keyword has been matched by the parser.
775+ /// Not all keywords are necessarily matched.
776+ /// </summary>
727777 public SqlKeywordInfo ? PreviousParsedKeyword ; // 8 bytes (reference type)
728778
729779 public SqlKeyword FirstSummaryKeyword ; // 4 bytes (enum, underlying int)
@@ -739,7 +789,16 @@ private ref struct ParseState
739789 public int PreviousTokenStartPosition ; // 4 bytes
740790 public int PreviousTokenEndPosition ; // 4 bytes
741791
792+ // NOTE: If the number of bool fields increases significantly, consider combining into a bitfield.
793+
742794 public bool CaptureNextTokenInSummary ; // 1 byte
795+
796+ /// <summary>
797+ /// Used to track if we are in a FROM clause for special handling of comma-separated table lists.
798+ /// When set to <c>true</c>, subsequent unmatched tokens will be compared against reserved keywords.
799+ /// As soon as we match a reserved keyword, we exit the FROM clause state.
800+ /// </summary>
801+ public bool InFromClause ; // 1 byte
743802 }
744803
745804 private sealed class SqlKeywordInfo
0 commit comments