Skip to content

Commit c296a46

Browse files
committed
Add comments to parser
1 parent ee6ae1d commit c296a46

File tree

2 files changed

+450
-436
lines changed

2 files changed

+450
-436
lines changed

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,24 @@ module QuerySummary
1616
# tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
1717
# Parser.build_summary_from_tokens(tokens) # => "SELECT users"
1818
class Parser
19-
DEFAULT_STATE = :default
19+
# Two states: normal parsing vs. waiting for table names
20+
PARSING_STATE = :parsing
2021
EXPECT_COLLECTION_STATE = :expect_collection
2122

22-
MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze
23-
COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze
24-
TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze
25-
TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
26-
TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
23+
MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze # Operations that start queries and need table names
24+
COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze # Operations that work with existing data and expect table names to follow
25+
TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze # Keywords that signal a table name is coming next
26+
TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze # Database structure operations that create, modify, or remove objects
27+
TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze # Types of database objects that can be created, modified, or removed
2728

2829
class << self
2930
def build_summary_from_tokens(tokens)
3031
summary_parts = []
31-
state = DEFAULT_STATE
32-
skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
32+
state = PARSING_STATE
33+
skip_until = 0 # Skip tokens we've already processed when looking ahead
3334

3435
tokens.each_with_index do |token, index|
35-
next if index < skip_until # Skip already processed tokens
36+
next if index < skip_until
3637

3738
result = process_token(token, tokens, index, state)
3839

@@ -59,11 +60,11 @@ def process_main_operation(token, tokens, index, current_state)
5960

6061
case upcased_value
6162
when *MAIN_OPERATIONS
62-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
63+
add_to_summary(token.value, PARSING_STATE, index + 1)
6364
when *COLLECTION_OPERATIONS
6465
add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
6566
when *TRIGGER_COLLECTION
66-
trigger_collection_mode(index + 1)
67+
expect_table_names_next(index + 1)
6768
when *TABLE_OPERATIONS
6869
handle_table_operation(token, tokens, index)
6970
when 'UNION'
@@ -79,17 +80,19 @@ def process_collection_token(token, tokens, index, state)
7980
upcased_value = token.value.upcase
8081

8182
if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
82-
handle_collection_identifier(token, tokens, index)
83+
process_table_name_and_alias(token, tokens, index)
8384
elsif token.value == '(' || token.type == :operator
8485
handle_collection_operator(token, state, index)
8586
else
86-
handle_collection_default(token, index)
87+
return_to_normal_parsing(token, index)
8788
end
8889
end
8990

90-
def handle_collection_identifier(token, tokens, index)
91+
def process_table_name_and_alias(token, tokens, index)
92+
# Look ahead to skip table aliases (e.g., "users u" or "users AS u")
9193
skip_count = calculate_alias_skip(tokens, index)
92-
new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
94+
# Check if there's a comma - if so, expect more table names in the list
95+
new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : PARSING_STATE
9396
skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
9497

9598
{ processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
@@ -99,25 +102,26 @@ def handle_collection_operator(token, state, index)
99102
{ processed: true, parts: [], new_state: state, next_index: index + 1 }
100103
end
101104

102-
def handle_collection_default(token, index)
103-
{ processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
105+
def return_to_normal_parsing(token, index)
106+
{ processed: true, parts: [], new_state: PARSING_STATE, next_index: index + 1 }
104107
end
105108

106109
def identifier_like?(token)
107110
%i[identifier quoted_identifier string].include?(token.type)
108111
end
109112

110113
def can_be_table_name?(upcased_value)
111-
# Keywords that can also be used as table/object names in certain contexts
114+
# Object types that can appear after DDL operations
112115
TABLE_OBJECTS.include?(upcased_value)
113116
end
114117

115118
def calculate_alias_skip(tokens, index)
119+
# Handle both "table AS alias" and "table alias" patterns
116120
next_token = tokens[index + 1]
117121
if next_token && next_token.value&.upcase == 'AS'
118-
2 # Skip 'AS' and the alias
122+
2
119123
elsif next_token && next_token.type == :identifier
120-
1 # Skip the alias
124+
1
121125
else
122126
0
123127
end
@@ -127,7 +131,7 @@ def add_to_summary(part, new_state, next_index)
127131
{ processed: true, parts: [part], new_state: new_state, next_index: next_index }
128132
end
129133

130-
def trigger_collection_mode(next_index)
134+
def expect_table_names_next(next_index)
131135
{ processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
132136
end
133137

@@ -138,21 +142,22 @@ def not_processed(current_state, next_index)
138142
def handle_union(token, tokens, index)
139143
next_token = tokens[index + 1]
140144
if next_token && next_token.value&.upcase == 'ALL'
141-
{ processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
145+
{ processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: PARSING_STATE, next_index: index + 2 }
142146
else
143-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
147+
add_to_summary(token.value, PARSING_STATE, index + 1)
144148
end
145149
end
146150

147151
def handle_table_operation(token, tokens, index)
152+
# Combine DDL operations with object types: "CREATE TABLE", "DROP INDEX", etc.
148153
next_token_obj = tokens[index + 1]
149154
next_token = next_token_obj&.value&.upcase
150155

151156
case next_token
152157
when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
153158
{ processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
154159
else
155-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
160+
add_to_summary(token.value, PARSING_STATE, index + 1)
156161
end
157162
end
158163
end

0 commit comments

Comments
 (0)