@@ -16,9 +16,18 @@ module QuerySummary
1616 # tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
1717 # Parser.build_summary_from_tokens(tokens) # => "SELECT users"
1818 class Parser
19+ DEFAULT_STATE = :default
20+ EXPECT_COLLECTION_STATE = :expect_collection
21+
22+ MAIN_OPERATIONS = %w[ SELECT INSERT DELETE ] . freeze
23+ COLLECTION_OPERATIONS = %w[ WITH UPDATE ] . freeze
24+ TRIGGER_COLLECTION = %w[ FROM INTO JOIN IN ] . freeze
25+ TABLE_OPERATIONS = %w[ CREATE ALTER DROP TRUNCATE ] . freeze
26+ TABLE_OBJECTS = %w[ TABLE INDEX PROCEDURE VIEW DATABASE ] . freeze
27+
1928 def self . build_summary_from_tokens ( tokens )
2029 summary_parts = [ ]
21- state = :default # Either :default or :expect_collection
30+ state = DEFAULT_STATE
2231 skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
2332
2433 tokens . each_with_index do |token , index |
@@ -45,14 +54,16 @@ def self.process_token(token, tokens, index, state)
4554 end
4655
4756 def self . process_main_operation ( token , tokens , index , current_state )
48- case token . value . upcase
49- when 'SELECT' , 'INSERT' , 'DELETE'
50- add_to_summary ( token . value , :default , index + 1 )
51- when 'WITH' , 'UPDATE'
52- add_to_summary ( token . value , :expect_collection , index + 1 )
53- when 'FROM' , 'INTO' , 'JOIN' , 'IN'
57+ upcased_value = token . value . upcase
58+
59+ case upcased_value
60+ when *MAIN_OPERATIONS
61+ add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
62+ when *COLLECTION_OPERATIONS
63+ add_to_summary ( token . value , EXPECT_COLLECTION_STATE , index + 1 )
64+ when *TRIGGER_COLLECTION
5465 trigger_collection_mode ( index + 1 )
55- when 'CREATE' , 'ALTER' , 'DROP' , 'TRUNCATE'
66+ when * TABLE_OPERATIONS
5667 handle_table_operation ( token , tokens , index )
5768 when 'UNION'
5869 handle_union ( token , tokens , index )
@@ -62,36 +73,49 @@ def self.process_main_operation(token, tokens, index, current_state)
6273 end
6374
6475 def self . process_collection_token ( token , tokens , index , state )
65- return { processed : false , parts : [ ] , new_state : state , next_index : index + 1 } unless state == :expect_collection
76+ return not_processed ( state , index + 1 ) unless state == EXPECT_COLLECTION_STATE
6677
6778 upcased_value = token . value . upcase
6879
6980 if identifier_like? ( token ) || ( token . type == :keyword && can_be_table_name? ( upcased_value ) )
70- skip_count = calculate_alias_skip ( tokens , index )
71- new_state = tokens [ index + 1 + skip_count ] &.value == ',' ? :expect_collection : :default
72- skip_count += 1 if tokens [ index + 1 + skip_count ] &.value == ','
73-
74- { processed : true , parts : [ token . value ] , new_state : new_state , next_index : index + 1 + skip_count }
81+ handle_collection_identifier ( token , tokens , index )
7582 elsif token . value == '(' || token . type == :operator
76- { processed : true , parts : [ ] , new_state : state , next_index : index + 1 }
83+ handle_collection_operator ( token , state , index )
7784 else
78- { processed : true , parts : [ ] , new_state : :default , next_index : index + 1 }
85+ handle_collection_default ( token , index )
7986 end
8087 end
8188
89+ def self . handle_collection_identifier ( token , tokens , index )
90+ skip_count = calculate_alias_skip ( tokens , index )
91+ new_state = tokens [ index + 1 + skip_count ] &.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
92+ skip_count += 1 if tokens [ index + 1 + skip_count ] &.value == ','
93+
94+ { processed : true , parts : [ token . value ] , new_state : new_state , next_index : index + 1 + skip_count }
95+ end
96+
97+ def self . handle_collection_operator ( token , state , index )
98+ { processed : true , parts : [ ] , new_state : state , next_index : index + 1 }
99+ end
100+
101+ def self . handle_collection_default ( token , index )
102+ { processed : true , parts : [ ] , new_state : DEFAULT_STATE , next_index : index + 1 }
103+ end
104+
82105 def self . identifier_like? ( token )
83106 %i[ identifier quoted_identifier string ] . include? ( token . type )
84107 end
85108
86109 def self . can_be_table_name? ( upcased_value )
87110 # Keywords that can also be used as table/object names in certain contexts
88- %w[ TABLE INDEX PROCEDURE VIEW DATABASE ] . include? ( upcased_value )
111+ TABLE_OBJECTS . include? ( upcased_value )
89112 end
90113
91114 def self . calculate_alias_skip ( tokens , index )
92- if tokens [ index + 1 ] &.value &.upcase == 'AS'
115+ next_token = tokens [ index + 1 ]
116+ if next_token && next_token . value &.upcase == 'AS'
93117 2 # Skip 'AS' and the alias
94- elsif tokens [ index + 1 ] & .type == :identifier
118+ elsif next_token && next_token . type == :identifier
95119 1 # Skip the alias
96120 else
97121 0
@@ -103,29 +127,31 @@ def self.add_to_summary(part, new_state, next_index)
103127 end
104128
105129 def self . trigger_collection_mode ( next_index )
106- { processed : true , parts : [ ] , new_state : :expect_collection , next_index : next_index }
130+ { processed : true , parts : [ ] , new_state : EXPECT_COLLECTION_STATE , next_index : next_index }
107131 end
108132
109133 def self . not_processed ( current_state , next_index )
110134 { processed : false , parts : [ ] , new_state : current_state , next_index : next_index }
111135 end
112136
113137 def self . handle_union ( token , tokens , index )
114- if tokens [ index + 1 ] &.value &.upcase == 'ALL'
115- { processed : true , parts : [ "#{ token . value } #{ tokens [ index + 1 ] . value } " ] , new_state : :default , next_index : index + 2 }
138+ next_token = tokens [ index + 1 ]
139+ if next_token && next_token . value &.upcase == 'ALL'
140+ { processed : true , parts : [ "#{ token . value } #{ next_token . value } " ] , new_state : DEFAULT_STATE , next_index : index + 2 }
116141 else
117- add_to_summary ( token . value , :default , index + 1 )
142+ add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
118143 end
119144 end
120145
121146 def self . handle_table_operation ( token , tokens , index )
122- next_token = tokens [ index + 1 ] &.value &.upcase
147+ next_token_obj = tokens [ index + 1 ]
148+ next_token = next_token_obj &.value &.upcase
123149
124150 case next_token
125151 when 'TABLE' , 'INDEX' , 'PROCEDURE' , 'VIEW' , 'DATABASE'
126- { processed : true , parts : [ "#{ token . value } #{ next_token } " ] , new_state : :expect_collection , next_index : index + 2 }
152+ { processed : true , parts : [ "#{ token . value } #{ next_token } " ] , new_state : EXPECT_COLLECTION_STATE , next_index : index + 2 }
127153 else
128- add_to_summary ( token . value , :default , index + 1 )
154+ add_to_summary ( token . value , DEFAULT_STATE , index + 1 )
129155 end
130156 end
131157 end
0 commit comments