Skip to content

Commit 61451b5

Browse files
feat: Refactor SQL processor file structure for clarity (#1731)
* Refactor file structure for clarify * Update require * Update tests * Rubocop: whitespace --------- Co-authored-by: Kayla Reopelle <[email protected]>
1 parent b20149f commit 61451b5

File tree

5 files changed

+170
-138
lines changed

5 files changed

+170
-138
lines changed

helpers/sql-processor/lib/opentelemetry/helpers.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# SPDX-License-Identifier: Apache-2.0
66

7-
require 'opentelemetry/helpers/sql_obfuscation'
7+
require 'opentelemetry/helpers/sql_processor'
88

99
module OpenTelemetry
1010
# The helpers module contains functionality shared across multiple

helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb

Lines changed: 0 additions & 130 deletions
This file was deleted.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright The OpenTelemetry Authors
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
require 'opentelemetry-common'
8+
require_relative 'sql_processor/obfuscator'
9+
10+
module OpenTelemetry
11+
module Helpers
12+
# SQL processing utilities for OpenTelemetry instrumentation.
13+
#
14+
# This module provides a unified interface for SQL processing operations
15+
# commonly needed in database adapter instrumentation, including SQL obfuscation.
16+
#
17+
# @api public
18+
module SqlProcessor
19+
module_function
20+
21+
# This is a SQL obfuscation utility intended for use in database adapter instrumentation. It uses the {Obfuscator} module.
22+
#
23+
# @param sql [String] The SQL to obfuscate.
24+
# @param obfuscation_limit [optional Integer] the length at which the SQL string will not be obfuscated
25+
# @param adapter [optional Symbol] the type of database adapter calling the method. `:default`, `:mysql`, `:postgres`, `:sqlite`, `:oracle`, `:cassandra` are supported.
26+
# @return [String] The SQL query string where the values are replaced with "?". When the sql statement exceeds the obfuscation limit
27+
# the first matched pair from the SQL statement will be returned, with an appended truncation message. If truncation is unsuccessful,
28+
# a string describing the error will be returned.
29+
#
30+
# @api public
31+
def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default)
32+
Obfuscator.obfuscate_sql(sql, obfuscation_limit: obfuscation_limit, adapter: adapter)
33+
end
34+
end
35+
end
36+
end
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright The OpenTelemetry Authors
4+
#
5+
# SPDX-License-Identifier: Apache-2.0module OpenTelemetry
6+
7+
require 'opentelemetry-common'
8+
9+
module OpenTelemetry
10+
module Helpers
11+
module SqlProcessor
12+
#
13+
# This module contains SQL obfuscation behavior to share with
14+
# instrumentation for specific database adapters.
15+
# The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb
16+
#
17+
# To use this in your instrumentation, the `Instrumentation` class for
18+
# your gem must contain configuration options for:
19+
# * `:db_statement`
20+
# Example:
21+
# `option :db_statement, default: :include, validate: %I[omit include obfuscate]`
22+
# * `:obfuscation_limit`
23+
# Example:
24+
# `option :obfuscation_limit, default: 2000, validate: :integer`
25+
#
26+
# If you want to add support for a new adapter, update the following
27+
# constants to include keys for your adapter:
28+
# * DIALECT_COMPONENTS
29+
# * CLEANUP_REGEX
30+
# You must also add a new constant that uses `generate_regex` with your
31+
# adapter's dialect components that is named like
32+
# `<ADAPTER>_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`.
33+
#
34+
# @api public
35+
module Obfuscator
36+
module_function
37+
38+
# From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb
39+
COMPONENTS_REGEX_MAP = {
40+
single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/,
41+
double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/,
42+
dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/,
43+
uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/,
44+
numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/,
45+
boolean_literals: /\b(?:true|false|null)\b/i,
46+
hexadecimal_literals: /0x[0-9a-fA-F]+/,
47+
comments: /(?:#|--).*?(?=\r|\n|$)/i,
48+
multi_line_comments: %r{(?:\/\*.*?\*\/)}m,
49+
oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/
50+
}.freeze
51+
52+
DIALECT_COMPONENTS = {
53+
default: COMPONENTS_REGEX_MAP.keys,
54+
mysql: %i[single_quotes double_quotes numeric_literals boolean_literals
55+
hexadecimal_literals comments multi_line_comments],
56+
postgres: %i[single_quotes dollar_quotes uuids numeric_literals
57+
boolean_literals comments multi_line_comments],
58+
sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals
59+
comments multi_line_comments],
60+
oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments
61+
multi_line_comments],
62+
cassandra: %i[single_quotes uuids numeric_literals boolean_literals
63+
hexadecimal_literals comments multi_line_comments]
64+
}.freeze
65+
66+
PLACEHOLDER = '?'
67+
68+
# We use these to check whether the query contains any quote characters
69+
# after obfuscation. If so, that's a good indication that the original
70+
# query was malformed, and so our obfuscation can't reliably find
71+
# literals. In such a case, we'll replace the entire query with a
72+
# placeholder.
73+
CLEANUP_REGEX = {
74+
default: %r{'|"|\/\*|\*\/},
75+
mysql: %r{'|"|\/\*|\*\//},
76+
postgres: %r{'|\/\*|\*\/|\$(?!\?)/},
77+
sqlite: %r{'|\/\*|\*\//},
78+
cassandra: %r{'|\/\*|\*\//},
79+
oracle: %r{'|\/\*|\*\//}
80+
}.freeze
81+
82+
# @api private
83+
def generate_regex(dialect)
84+
components = DIALECT_COMPONENTS[dialect]
85+
Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] })
86+
end
87+
88+
DEFAULT_COMPONENTS_REGEX = generate_regex(:default)
89+
MYSQL_COMPONENTS_REGEX = generate_regex(:mysql)
90+
POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres)
91+
SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite)
92+
CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra)
93+
ORACLE_COMPONENTS_REGEX = generate_regex(:oracle)
94+
95+
# Internal implementation of SQL obfuscation.
96+
# Use SqlProcessor.obfuscate_sql for the public API.
97+
#
98+
# @api private
99+
def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default)
100+
return "SQL not obfuscated, query exceeds #{obfuscation_limit} characters" if sql.size > obfuscation_limit
101+
102+
regex = case adapter
103+
when :mysql
104+
MYSQL_COMPONENTS_REGEX
105+
when :postgres
106+
POSTGRES_COMPONENTS_REGEX
107+
else
108+
DEFAULT_COMPONENTS_REGEX
109+
end
110+
111+
# Original MySQL UTF-8 Encoding Fixes:
112+
# https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160
113+
# https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345
114+
sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true)
115+
116+
sql = sql.gsub(regex, PLACEHOLDER)
117+
return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql)
118+
119+
sql
120+
rescue StandardError => e
121+
OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e)
122+
end
123+
end
124+
end
125+
end
126+
end

helpers/sql-processor/test/helpers/sql_obfuscation_test.rb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,43 +9,43 @@
99

1010
require_relative '../test_helper'
1111

12-
class SqlObfuscationTest < Minitest::Test
12+
class SqlProcessorTest < Minitest::Test
1313
def test_named_arg_defaults_obfuscates
1414
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]'"
1515
expected = 'SELECT * from users where users.id = ? and users.email = ?'
16-
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql)
16+
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql)
1717

1818
assert_equal(expected, result)
1919
end
2020

2121
def test_obfuscation_returns_message_when_limit_is_reached
2222
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]'"
2323
expected = 'SQL not obfuscated, query exceeds 42 characters'
24-
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: 42)
24+
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, obfuscation_limit: 42)
2525

2626
assert_equal(expected, result)
2727
end
2828

2929
def test_non_utf_8_encoded_string_obfuscates_with_mysql
3030
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]\255'"
3131
expected = 'SELECT * from users where users.id = ? and users.email = ?'
32-
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :mysql)
32+
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :mysql)
3333

3434
assert_equal(expected, result)
3535
end
3636

3737
def test_non_utf_8_encoded_string_obfuscates_with_postgres
3838
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]\255'"
3939
expected = 'SELECT * from users where users.id = ? and users.email = ?'
40-
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :postgres)
40+
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :postgres)
4141

4242
assert_equal(expected, result)
4343
end
4444

4545
def test_statement_with_emoji_encodes_utf_8_and_obfuscates
4646
sql = "SELECT * from users where users.id = 1 and users.email = 'test@😄.com'"
4747
expected = 'SELECT * from users where users.id = ? and users.email = ?'
48-
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql)
48+
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql)
4949

5050
assert_equal(expected, result)
5151
end
@@ -89,7 +89,7 @@ def self.load_fixture
8989

9090
dialects.each do |dialect|
9191
define_method(:"test_sql_obfuscation_#{name}_#{dialect}") do
92-
actual_obfuscated = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(query, adapter: dialect.to_sym)
92+
actual_obfuscated = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(query, adapter: dialect.to_sym)
9393
message = build_failure_message(query, dialect, acceptable_outputs, actual_obfuscated)
9494

9595
assert_includes(acceptable_outputs, actual_obfuscated, message)

0 commit comments

Comments
 (0)