Skip to content

Commit 08a8964

Browse files
committed
Code and test updates
1 parent 63fec36 commit 08a8964

File tree

4 files changed

+115
-39
lines changed

4 files changed

+115
-39
lines changed

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ class Cache
2222
@cache_size = DEFAULT_SIZE
2323

2424
def self.fetch(key)
25-
return @cache[key] if @cache.key?(key)
25+
@cache_mutex.synchronize do
26+
return @cache[key] if @cache.key?(key)
2627

27-
result = yield
28-
store(key, result)
29-
result
28+
result = yield
29+
@cache.shift if @cache.size >= @cache_size
30+
@cache[key] = result
31+
result
32+
end
3033
end
3134

3235
def self.configure(size: DEFAULT_SIZE)

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,18 @@ module QuerySummary
1616
# tokens = [Token.new(:keyword, "SELECT"), Token.new(:identifier, "users")]
1717
# Parser.build_summary_from_tokens(tokens) # => "SELECT users"
1818
class Parser
19+
DEFAULT_STATE = :default
20+
EXPECT_COLLECTION_STATE = :expect_collection
21+
22+
MAIN_OPERATIONS = %w[SELECT INSERT DELETE].freeze
23+
COLLECTION_OPERATIONS = %w[WITH UPDATE].freeze
24+
TRIGGER_COLLECTION = %w[FROM INTO JOIN IN].freeze
25+
TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
26+
TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
27+
1928
def self.build_summary_from_tokens(tokens)
2029
summary_parts = []
21-
state = :default # Either :default or :expect_collection
30+
state = DEFAULT_STATE
2231
skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
2332

2433
tokens.each_with_index do |token, index|
@@ -45,14 +54,16 @@ def self.process_token(token, tokens, index, state)
4554
end
4655

4756
def self.process_main_operation(token, tokens, index, current_state)
48-
case token.value.upcase
49-
when 'SELECT', 'INSERT', 'DELETE'
50-
add_to_summary(token.value, :default, index + 1)
51-
when 'WITH', 'UPDATE'
52-
add_to_summary(token.value, :expect_collection, index + 1)
53-
when 'FROM', 'INTO', 'JOIN', 'IN'
57+
upcased_value = token.value.upcase
58+
59+
case upcased_value
60+
when *MAIN_OPERATIONS
61+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
62+
when *COLLECTION_OPERATIONS
63+
add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
64+
when *TRIGGER_COLLECTION
5465
trigger_collection_mode(index + 1)
55-
when 'CREATE', 'ALTER', 'DROP', 'TRUNCATE'
66+
when *TABLE_OPERATIONS
5667
handle_table_operation(token, tokens, index)
5768
when 'UNION'
5869
handle_union(token, tokens, index)
@@ -62,36 +73,49 @@ def self.process_main_operation(token, tokens, index, current_state)
6273
end
6374

6475
def self.process_collection_token(token, tokens, index, state)
65-
return { processed: false, parts: [], new_state: state, next_index: index + 1 } unless state == :expect_collection
76+
return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
6677

6778
upcased_value = token.value.upcase
6879

6980
if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
70-
skip_count = calculate_alias_skip(tokens, index)
71-
new_state = tokens[index + 1 + skip_count]&.value == ',' ? :expect_collection : :default
72-
skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
73-
74-
{ processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
81+
handle_collection_identifier(token, tokens, index)
7582
elsif token.value == '(' || token.type == :operator
76-
{ processed: true, parts: [], new_state: state, next_index: index + 1 }
83+
handle_collection_operator(token, state, index)
7784
else
78-
{ processed: true, parts: [], new_state: :default, next_index: index + 1 }
85+
handle_collection_default(token, index)
7986
end
8087
end
8188

89+
def self.handle_collection_identifier(token, tokens, index)
90+
skip_count = calculate_alias_skip(tokens, index)
91+
new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
92+
skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
93+
94+
{ processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
95+
end
96+
97+
def self.handle_collection_operator(token, state, index)
98+
{ processed: true, parts: [], new_state: state, next_index: index + 1 }
99+
end
100+
101+
def self.handle_collection_default(token, index)
102+
{ processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
103+
end
104+
82105
def self.identifier_like?(token)
83106
%i[identifier quoted_identifier string].include?(token.type)
84107
end
85108

86109
def self.can_be_table_name?(upcased_value)
87110
# Keywords that can also be used as table/object names in certain contexts
88-
%w[TABLE INDEX PROCEDURE VIEW DATABASE].include?(upcased_value)
111+
TABLE_OBJECTS.include?(upcased_value)
89112
end
90113

91114
def self.calculate_alias_skip(tokens, index)
92-
if tokens[index + 1]&.value&.upcase == 'AS'
115+
next_token = tokens[index + 1]
116+
if next_token && next_token.value&.upcase == 'AS'
93117
2 # Skip 'AS' and the alias
94-
elsif tokens[index + 1]&.type == :identifier
118+
elsif next_token && next_token.type == :identifier
95119
1 # Skip the alias
96120
else
97121
0
@@ -103,29 +127,31 @@ def self.add_to_summary(part, new_state, next_index)
103127
end
104128

105129
def self.trigger_collection_mode(next_index)
106-
{ processed: true, parts: [], new_state: :expect_collection, next_index: next_index }
130+
{ processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
107131
end
108132

109133
def self.not_processed(current_state, next_index)
110134
{ processed: false, parts: [], new_state: current_state, next_index: next_index }
111135
end
112136

113137
def self.handle_union(token, tokens, index)
114-
if tokens[index + 1]&.value&.upcase == 'ALL'
115-
{ processed: true, parts: ["#{token.value} #{tokens[index + 1].value}"], new_state: :default, next_index: index + 2 }
138+
next_token = tokens[index + 1]
139+
if next_token && next_token.value&.upcase == 'ALL'
140+
{ processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
116141
else
117-
add_to_summary(token.value, :default, index + 1)
142+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
118143
end
119144
end
120145

121146
def self.handle_table_operation(token, tokens, index)
122-
next_token = tokens[index + 1]&.value&.upcase
147+
next_token_obj = tokens[index + 1]
148+
next_token = next_token_obj&.value&.upcase
123149

124150
case next_token
125151
when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
126-
{ processed: true, parts: ["#{token.value} #{next_token}"], new_state: :expect_collection, next_index: index + 2 }
152+
{ processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
127153
else
128-
add_to_summary(token.value, :default, index + 1)
154+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
129155
end
130156
end
131157
end

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,20 @@ def self.tokenize(query)
4040
scanner = StringScanner.new(query)
4141
tokens = []
4242

43-
until scanner.eos?
44-
matched = TOKEN_REGEX.any? do |type, regex|
45-
next unless (value = scanner.scan(regex))
46-
47-
tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
48-
true
49-
end
50-
scanner.getch unless matched
51-
end
43+
scan_next_token(scanner, tokens) until scanner.eos?
5244

5345
tokens
5446
end
47+
48+
def self.scan_next_token(scanner, tokens)
49+
matched = TOKEN_REGEX.any? do |type, regex|
50+
next unless (value = scanner.scan(regex))
51+
52+
tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
53+
true
54+
end
55+
scanner.getch unless matched
56+
end
5557
end
5658
end
5759
end

helpers/sql-obfuscation/test/fixtures/query_summary.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,24 @@
1717
"db.query.summary": "SELECT"
1818
}
1919
},
20+
{
21+
"name": "nil_input",
22+
"input": {
23+
"query": null
24+
},
25+
"expected": {
26+
"db.query.summary": "UNKNOWN"
27+
}
28+
},
29+
{
30+
"name": "deeply_nested_subqueries",
31+
"input": {
32+
"query": "SELECT * FROM (SELECT * FROM (SELECT * FROM my_table))"
33+
},
34+
"expected": {
35+
"db.query.summary": "SELECT SELECT SELECT my_table"
36+
}
37+
},
2038
{
2139
"name": "numeric_literal_with_decimal_point",
2240
"input": {
@@ -368,6 +386,33 @@
368386
"expected": {
369387
"db.query.summary": "SELECT products"
370388
}
389+
},
390+
{
391+
"name": "create_index",
392+
"input": {
393+
"query": "CREATE INDEX idx_name ON MyTable (column1)"
394+
},
395+
"expected": {
396+
"db.query.summary": "CREATE INDEX idx_name"
397+
}
398+
},
399+
{
400+
"name": "create_database",
401+
"input": {
402+
"query": "CREATE DATABASE my_db"
403+
},
404+
"expected": {
405+
"db.query.summary": "CREATE DATABASE my_db"
406+
}
407+
},
408+
{
409+
"name": "create_procedure",
410+
"input": {
411+
"query": "CREATE PROCEDURE my_proc AS BEGIN SELECT * FROM MyTable END"
412+
},
413+
"expected": {
414+
"db.query.summary": "CREATE PROCEDURE my_proc SELECT MyTable"
415+
}
371416
}
372417
]
373418

0 commit comments

Comments
 (0)