@@ -16,23 +16,24 @@ module QuerySummary
1616 # tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
1717 # Parser.build_summary_from_tokens(tokens) # => "SELECT users"
1818 class Parser
19- DEFAULT_STATE = :default
19+ # Two states: normal parsing vs. waiting for table names
20+ PARSING_STATE = :parsing
2021 EXPECT_COLLECTION_STATE = :expect_collection
2122
22- MAIN_OPERATIONS = %w[ SELECT INSERT DELETE ] . freeze
23- COLLECTION_OPERATIONS = %w[ WITH UPDATE ] . freeze
24- TRIGGER_COLLECTION = %w[ FROM INTO JOIN IN ] . freeze
25- TABLE_OPERATIONS = %w[ CREATE ALTER DROP TRUNCATE ] . freeze
26- TABLE_OBJECTS = %w[ TABLE INDEX PROCEDURE VIEW DATABASE ] . freeze
23+ MAIN_OPERATIONS = %w[ SELECT INSERT DELETE ] . freeze # Operations that start queries and need table names
24+ COLLECTION_OPERATIONS = %w[ WITH UPDATE ] . freeze # Operations that work with existing data and expect table names to follow
25+ TRIGGER_COLLECTION = %w[ FROM INTO JOIN IN ] . freeze # Keywords that signal a table name is coming next
26+ TABLE_OPERATIONS = %w[ CREATE ALTER DROP TRUNCATE ] . freeze # Database structure operations that create, modify, or remove objects
27+ TABLE_OBJECTS = %w[ TABLE INDEX PROCEDURE VIEW DATABASE ] . freeze # Types of database objects that can be created, modified, or removed
2728
2829 class << self
2930 def build_summary_from_tokens ( tokens )
3031 summary_parts = [ ]
31- state = DEFAULT_STATE
32- skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
32+ state = PARSING_STATE
33+ skip_until = 0 # Skip tokens we've already processed when looking ahead
3334
3435 tokens . each_with_index do |token , index |
35- next if index < skip_until # Skip already processed tokens
36+ next if index < skip_until
3637
3738 result = process_token ( token , tokens , index , state )
3839
@@ -59,11 +60,11 @@ def process_main_operation(token, tokens, index, current_state)
5960
6061 case upcased_value
6162 when *MAIN_OPERATIONS
62- add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
63+ add_to_summary ( token . value , PARSING_STATE , index + 1 )
6364 when *COLLECTION_OPERATIONS
6465 add_to_summary ( token . value , EXPECT_COLLECTION_STATE , index + 1 )
6566 when *TRIGGER_COLLECTION
66- trigger_collection_mode ( index + 1 )
67+ expect_table_names_next ( index + 1 )
6768 when *TABLE_OPERATIONS
6869 handle_table_operation ( token , tokens , index )
6970 when 'UNION'
@@ -79,17 +80,19 @@ def process_collection_token(token, tokens, index, state)
7980 upcased_value = token . value . upcase
8081
8182 if identifier_like? ( token ) || ( token . type == :keyword && can_be_table_name? ( upcased_value ) )
82- handle_collection_identifier ( token , tokens , index )
83+ process_table_name_and_alias ( token , tokens , index )
8384 elsif token . value == '(' || token . type == :operator
8485 handle_collection_operator ( token , state , index )
8586 else
86- handle_collection_default ( token , index )
87+ return_to_normal_parsing ( token , index )
8788 end
8889 end
8990
90- def handle_collection_identifier ( token , tokens , index )
91+ def process_table_name_and_alias ( token , tokens , index )
92+ # Look ahead to skip table aliases (e.g., "users u" or "users AS u")
9193 skip_count = calculate_alias_skip ( tokens , index )
92- new_state = tokens [ index + 1 + skip_count ] &.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
94+ # Check if there's a comma - if so, expect more table names in the list
95+ new_state = tokens [ index + 1 + skip_count ] &.value == ',' ? EXPECT_COLLECTION_STATE : PARSING_STATE
9396 skip_count += 1 if tokens [ index + 1 + skip_count ] &.value == ','
9497
9598 { processed : true , parts : [ token . value ] , new_state : new_state , next_index : index + 1 + skip_count }
@@ -99,25 +102,26 @@ def handle_collection_operator(token, state, index)
99102 { processed : true , parts : [ ] , new_state : state , next_index : index + 1 }
100103 end
101104
102- def handle_collection_default ( token , index )
103- { processed : true , parts : [ ] , new_state : DEFAULT_STATE , next_index : index + 1 }
105+ def return_to_normal_parsing ( token , index )
106+ { processed : true , parts : [ ] , new_state : PARSING_STATE , next_index : index + 1 }
104107 end
105108
106109 def identifier_like? ( token )
107110 %i[ identifier quoted_identifier string ] . include? ( token . type )
108111 end
109112
110113 def can_be_table_name? ( upcased_value )
111- # Keywords that can also be used as table/object names in certain contexts
114+ # Object types that can appear after DDL operations
112115 TABLE_OBJECTS . include? ( upcased_value )
113116 end
114117
115118 def calculate_alias_skip ( tokens , index )
119+ # Handle both "table AS alias" and "table alias" patterns
116120 next_token = tokens [ index + 1 ]
117121 if next_token && next_token . value &.upcase == 'AS'
118- 2 # Skip 'AS' and the alias
122+ 2
119123 elsif next_token && next_token . type == :identifier
120- 1 # Skip the alias
124+ 1
121125 else
122126 0
123127 end
@@ -127,7 +131,7 @@ def add_to_summary(part, new_state, next_index)
127131 { processed : true , parts : [ part ] , new_state : new_state , next_index : next_index }
128132 end
129133
130- def trigger_collection_mode ( next_index )
134+ def expect_table_names_next ( next_index )
131135 { processed : true , parts : [ ] , new_state : EXPECT_COLLECTION_STATE , next_index : next_index }
132136 end
133137
@@ -138,21 +142,22 @@ def not_processed(current_state, next_index)
138142 def handle_union ( token , tokens , index )
139143 next_token = tokens [ index + 1 ]
140144 if next_token && next_token . value &.upcase == 'ALL'
141- { processed : true , parts : [ "#{ token . value } #{ next_token . value } " ] , new_state : DEFAULT_STATE , next_index : index + 2 }
145+ { processed : true , parts : [ "#{ token . value } #{ next_token . value } " ] , new_state : PARSING_STATE , next_index : index + 2 }
142146 else
143- add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
147+ add_to_summary ( token . value , PARSING_STATE , index + 1 )
144148 end
145149 end
146150
147151 def handle_table_operation ( token , tokens , index )
152+ # Combine DDL operations with object types: "CREATE TABLE", "DROP INDEX", etc.
148153 next_token_obj = tokens [ index + 1 ]
149154 next_token = next_token_obj &.value &.upcase
150155
151156 case next_token
152157 when 'TABLE' , 'INDEX' , 'PROCEDURE' , 'VIEW' , 'DATABASE'
153158 { processed : true , parts : [ "#{ token . value } #{ next_token } " ] , new_state : EXPECT_COLLECTION_STATE , next_index : index + 2 }
154159 else
155- add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
160+ add_to_summary ( token . value , PARSING_STATE , index + 1 )
156161 end
157162 end
158163 end
0 commit comments