Skip to content

Commit 784f1a4

Browse files
committed
Refactoring, cache tests
1 parent 56be581 commit 784f1a4

File tree

7 files changed

+187
-142
lines changed

7 files changed

+187
-142
lines changed

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary.rb

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#
55
# SPDX-License-Identifier: Apache-2.0
66

7-
require_relative 'query_summary/cache'
87
require_relative 'query_summary/tokenizer'
8+
require_relative 'query_summary/cache'
99
require_relative 'query_summary/parser'
1010

1111
module OpenTelemetry
@@ -17,17 +17,25 @@ module Helpers
1717
# QuerySummary.generate_summary("SELECT * FROM users WHERE id = 1")
1818
# # => "SELECT users"
1919
module QuerySummary
20-
def self.configure_cache(size: Cache::DEFAULT_SIZE)
21-
Cache.configure(size: size)
22-
end
20+
class << self
21+
def configure_cache(size: Cache::DEFAULT_SIZE)
22+
cache_instance.configure(size: size)
23+
end
24+
25+
def generate_summary(query)
26+
cache_instance.fetch(query) do
27+
tokens = Tokenizer.tokenize(query)
28+
Parser.build_summary_from_tokens(tokens)
29+
end
30+
rescue StandardError
31+
'UNKNOWN'
32+
end
33+
34+
private
2335

24-
def self.generate_summary(query)
25-
Cache.fetch(query) do
26-
tokens = Tokenizer.tokenize(query)
27-
Parser.build_summary_from_tokens(tokens)
36+
def cache_instance
37+
@cache_instance ||= Cache.new
2838
end
29-
rescue StandardError
30-
'UNKNOWN'
3139
end
3240
end
3341
end

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/cache.rb

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Copyright The OpenTelemetry Authors
44
#
5-
# SPDX-License-Identifier: Apache-2.0module OpenTelemetry
5+
# SPDX-License-Identifier: Apache-2.0
66

77
module OpenTelemetry
88
module Helpers
@@ -13,39 +13,43 @@ module QuerySummary
1313
# Uses mutex synchronization for thread safety.
1414
#
1515
# @example
16-
# Cache.fetch("SELECT * FROM users") { "SELECT users" } # => "SELECT users"
16+
# cache = Cache.new
17+
# cache.fetch("SELECT * FROM users") { "SELECT users" } # => "SELECT users"
1718
class Cache
1819
DEFAULT_SIZE = 1000
1920

20-
@cache = {}
21-
@cache_mutex = Mutex.new
22-
@cache_size = DEFAULT_SIZE
21+
def initialize(size: DEFAULT_SIZE)
22+
@cache = {}
23+
@cache_mutex = Mutex.new
24+
@cache_size = size
25+
end
2326

24-
def self.fetch(key)
27+
def fetch(key)
2528
@cache_mutex.synchronize do
2629
return @cache[key] if @cache.key?(key)
2730

2831
result = yield
29-
@cache.shift if @cache.size >= @cache_size
32+
evict_if_needed
3033
@cache[key] = result
3134
result
3235
end
3336
end
3437

35-
def self.configure(size: DEFAULT_SIZE)
36-
@cache_mutex.synchronize do
37-
@cache_size = size
38-
@cache.clear if @cache.size > size
39-
end
38+
private
39+
40+
def configure(size: DEFAULT_SIZE)
41+
@cache_size = size
42+
@cache.clear if @cache.size > size
4043
end
4144

42-
def self.store(key, value)
43-
@cache_mutex.synchronize do
44-
@cache.shift if @cache.size >= @cache_size
45-
@cache[key] = value
46-
end
45+
def clear
46+
@cache.clear
47+
end
48+
49+
def evict_if_needed
50+
@cache.shift if @cache.size >= @cache_size
4751
end
4852
end
4953
end
5054
end
51-
end
55+
end

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/parser.rb

Lines changed: 101 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -25,133 +25,135 @@ class Parser
2525
TABLE_OPERATIONS = %w[CREATE ALTER DROP TRUNCATE].freeze
2626
TABLE_OBJECTS = %w[TABLE INDEX PROCEDURE VIEW DATABASE].freeze
2727

28-
def self.build_summary_from_tokens(tokens)
29-
summary_parts = []
30-
state = DEFAULT_STATE
31-
skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
28+
class << self
29+
def build_summary_from_tokens(tokens)
30+
summary_parts = []
31+
state = DEFAULT_STATE
32+
skip_until = 0 # Next token index to process; allows skipping tokens already consumed by previous operations
3233

33-
tokens.each_with_index do |token, index|
34-
next if index < skip_until # Skip already processed tokens
34+
tokens.each_with_index do |token, index|
35+
next if index < skip_until # Skip already processed tokens
3536

36-
result = process_token(token, tokens, index, state)
37+
result = process_token(token, tokens, index, state)
3738

38-
summary_parts.concat(result[:parts])
39-
state = result[:new_state]
40-
skip_until = result[:next_index]
41-
end
39+
summary_parts.concat(result[:parts])
40+
state = result[:new_state]
41+
skip_until = result[:next_index]
42+
end
4243

43-
summary_parts.join(' ')
44-
end
44+
summary_parts.join(' ')
45+
end
4546

46-
def self.process_token(token, tokens, index, state)
47-
operation_result = process_main_operation(token, tokens, index, state)
48-
return operation_result if operation_result[:processed]
47+
def process_token(token, tokens, index, state)
48+
operation_result = process_main_operation(token, tokens, index, state)
49+
return operation_result if operation_result[:processed]
4950

50-
collection_result = process_collection_token(token, tokens, index, state)
51-
return collection_result if collection_result[:processed]
51+
collection_result = process_collection_token(token, tokens, index, state)
52+
return collection_result if collection_result[:processed]
5253

53-
{ processed: false, parts: [], new_state: state, next_index: index + 1 }
54-
end
54+
{ processed: false, parts: [], new_state: state, next_index: index + 1 }
55+
end
5556

56-
def self.process_main_operation(token, tokens, index, current_state)
57-
upcased_value = token.value.upcase
58-
59-
case upcased_value
60-
when *MAIN_OPERATIONS
61-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
62-
when *COLLECTION_OPERATIONS
63-
add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
64-
when *TRIGGER_COLLECTION
65-
trigger_collection_mode(index + 1)
66-
when *TABLE_OPERATIONS
67-
handle_table_operation(token, tokens, index)
68-
when 'UNION'
69-
handle_union(token, tokens, index)
70-
else
71-
not_processed(current_state, index + 1)
57+
def process_main_operation(token, tokens, index, current_state)
58+
upcased_value = token.value.upcase
59+
60+
case upcased_value
61+
when *MAIN_OPERATIONS
62+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
63+
when *COLLECTION_OPERATIONS
64+
add_to_summary(token.value, EXPECT_COLLECTION_STATE, index + 1)
65+
when *TRIGGER_COLLECTION
66+
trigger_collection_mode(index + 1)
67+
when *TABLE_OPERATIONS
68+
handle_table_operation(token, tokens, index)
69+
when 'UNION'
70+
handle_union(token, tokens, index)
71+
else
72+
not_processed(current_state, index + 1)
73+
end
7274
end
73-
end
7475

75-
def self.process_collection_token(token, tokens, index, state)
76-
return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
76+
def process_collection_token(token, tokens, index, state)
77+
return not_processed(state, index + 1) unless state == EXPECT_COLLECTION_STATE
7778

78-
upcased_value = token.value.upcase
79+
upcased_value = token.value.upcase
7980

80-
if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
81-
handle_collection_identifier(token, tokens, index)
82-
elsif token.value == '(' || token.type == :operator
83-
handle_collection_operator(token, state, index)
84-
else
85-
handle_collection_default(token, index)
81+
if identifier_like?(token) || (token.type == :keyword && can_be_table_name?(upcased_value))
82+
handle_collection_identifier(token, tokens, index)
83+
elsif token.value == '(' || token.type == :operator
84+
handle_collection_operator(token, state, index)
85+
else
86+
handle_collection_default(token, index)
87+
end
8688
end
87-
end
8889

89-
def self.handle_collection_identifier(token, tokens, index)
90-
skip_count = calculate_alias_skip(tokens, index)
91-
new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
92-
skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
90+
def handle_collection_identifier(token, tokens, index)
91+
skip_count = calculate_alias_skip(tokens, index)
92+
new_state = tokens[index + 1 + skip_count]&.value == ',' ? EXPECT_COLLECTION_STATE : DEFAULT_STATE
93+
skip_count += 1 if tokens[index + 1 + skip_count]&.value == ','
9394

94-
{ processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
95-
end
95+
{ processed: true, parts: [token.value], new_state: new_state, next_index: index + 1 + skip_count }
96+
end
9697

97-
def self.handle_collection_operator(token, state, index)
98-
{ processed: true, parts: [], new_state: state, next_index: index + 1 }
99-
end
98+
def handle_collection_operator(token, state, index)
99+
{ processed: true, parts: [], new_state: state, next_index: index + 1 }
100+
end
100101

101-
def self.handle_collection_default(token, index)
102-
{ processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
103-
end
102+
def handle_collection_default(token, index)
103+
{ processed: true, parts: [], new_state: DEFAULT_STATE, next_index: index + 1 }
104+
end
104105

105-
def self.identifier_like?(token)
106-
%i[identifier quoted_identifier string].include?(token.type)
107-
end
106+
def identifier_like?(token)
107+
%i[identifier quoted_identifier string].include?(token.type)
108+
end
108109

109-
def self.can_be_table_name?(upcased_value)
110-
# Keywords that can also be used as table/object names in certain contexts
111-
TABLE_OBJECTS.include?(upcased_value)
112-
end
110+
def can_be_table_name?(upcased_value)
111+
# Keywords that can also be used as table/object names in certain contexts
112+
TABLE_OBJECTS.include?(upcased_value)
113+
end
113114

114-
def self.calculate_alias_skip(tokens, index)
115-
next_token = tokens[index + 1]
116-
if next_token && next_token.value&.upcase == 'AS'
117-
2 # Skip 'AS' and the alias
118-
elsif next_token && next_token.type == :identifier
119-
1 # Skip the alias
120-
else
121-
0
115+
def calculate_alias_skip(tokens, index)
116+
next_token = tokens[index + 1]
117+
if next_token && next_token.value&.upcase == 'AS'
118+
2 # Skip 'AS' and the alias
119+
elsif next_token && next_token.type == :identifier
120+
1 # Skip the alias
121+
else
122+
0
123+
end
122124
end
123-
end
124125

125-
def self.add_to_summary(part, new_state, next_index)
126-
{ processed: true, parts: [part], new_state: new_state, next_index: next_index }
127-
end
126+
def add_to_summary(part, new_state, next_index)
127+
{ processed: true, parts: [part], new_state: new_state, next_index: next_index }
128+
end
128129

129-
def self.trigger_collection_mode(next_index)
130-
{ processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
131-
end
130+
def trigger_collection_mode(next_index)
131+
{ processed: true, parts: [], new_state: EXPECT_COLLECTION_STATE, next_index: next_index }
132+
end
132133

133-
def self.not_processed(current_state, next_index)
134-
{ processed: false, parts: [], new_state: current_state, next_index: next_index }
135-
end
134+
def not_processed(current_state, next_index)
135+
{ processed: false, parts: [], new_state: current_state, next_index: next_index }
136+
end
136137

137-
def self.handle_union(token, tokens, index)
138-
next_token = tokens[index + 1]
139-
if next_token && next_token.value&.upcase == 'ALL'
140-
{ processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
141-
else
142-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
138+
def handle_union(token, tokens, index)
139+
next_token = tokens[index + 1]
140+
if next_token && next_token.value&.upcase == 'ALL'
141+
{ processed: true, parts: ["#{token.value} #{next_token.value}"], new_state: DEFAULT_STATE, next_index: index + 2 }
142+
else
143+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
144+
end
143145
end
144-
end
145146

146-
def self.handle_table_operation(token, tokens, index)
147-
next_token_obj = tokens[index + 1]
148-
next_token = next_token_obj&.value&.upcase
147+
def handle_table_operation(token, tokens, index)
148+
next_token_obj = tokens[index + 1]
149+
next_token = next_token_obj&.value&.upcase
149150

150-
case next_token
151-
when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
152-
{ processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
153-
else
154-
add_to_summary(token.value, DEFAULT_STATE, index + 1)
151+
case next_token
152+
when 'TABLE', 'INDEX', 'PROCEDURE', 'VIEW', 'DATABASE'
153+
{ processed: true, parts: ["#{token.value} #{next_token}"], new_state: EXPECT_COLLECTION_STATE, next_index: index + 2 }
154+
else
155+
add_to_summary(token.value, DEFAULT_STATE, index + 1)
156+
end
155157
end
156158
end
157159
end

helpers/sql-obfuscation/lib/opentelemetry/helpers/query_summary/tokenizer.rb

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,25 @@ class Tokenizer
3636

3737
EXCLUDED_TYPES = %i[whitespace comment].freeze
3838

39-
def self.tokenize(query)
40-
scanner = StringScanner.new(query)
41-
tokens = []
39+
class << self
40+
def tokenize(query)
41+
scanner = StringScanner.new(query)
42+
tokens = []
4243

43-
scan_next_token(scanner, tokens) until scanner.eos?
44+
scan_next_token(scanner, tokens) until scanner.eos?
4445

45-
tokens
46-
end
46+
tokens
47+
end
4748

48-
def self.scan_next_token(scanner, tokens)
49-
matched = TOKEN_REGEX.any? do |type, regex|
50-
next unless (value = scanner.scan(regex))
49+
def scan_next_token(scanner, tokens)
50+
matched = TOKEN_REGEX.any? do |type, regex|
51+
next unless (value = scanner.scan(regex))
5152

52-
tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
53-
true
53+
tokens << Token.new(type, value) unless EXCLUDED_TYPES.include?(type)
54+
true
55+
end
56+
scanner.getch unless matched
5457
end
55-
scanner.getch unless matched
5658
end
5759
end
5860
end

0 commit comments

Comments
 (0)