From 82e820552bbd9e67c287a03212181795fe931277 Mon Sep 17 00:00:00 2001 From: Hannah Ramadan Date: Thu, 9 Oct 2025 14:46:54 -0700 Subject: [PATCH 1/4] Refactor file structure for clarify --- .../opentelemetry/helpers/sql_obfuscation.rb | 130 ------------------ .../opentelemetry/helpers/sql_processor.rb | 36 +++++ .../helpers/sql_processor/obfuscator.rb | 126 +++++++++++++++++ 3 files changed, 162 insertions(+), 130 deletions(-) delete mode 100644 helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb create mode 100644 helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb create mode 100644 helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb diff --git a/helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb b/helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb deleted file mode 100644 index debb2b7e5c..0000000000 --- a/helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb +++ /dev/null @@ -1,130 +0,0 @@ -# frozen_string_literal: true - -# Copyright The OpenTelemetry Authors -# -# SPDX-License-Identifier: Apache-2.0module OpenTelemetry - -require 'opentelemetry-common' - -module OpenTelemetry - module Helpers - # - # This module contains SQL obfuscation behavior to share with - # instrumentation for specific database adapters. - # The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb - # - # To use this in your instrumentation, the `Instrumentation` class for - # your gem must contain configuration options for: - # * `:db_statement` - # Example: - # `option :db_statement, default: :include, validate: %I[omit include obfuscate]` - # * `:obfuscation_limit` - # Example: - # `option :obfuscation_limit, default: 2000, validate: :integer` - # - # If you want to add support for a new adapter, update the following - # constants to include keys for your adapter: - # * DIALECT_COMPONENTS - # * CLEANUP_REGEX - # You must also add a new constant that uses `generate_regex` with your - # adapter's dialect components that is named like - # `_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`. - # - # @api public - module SqlObfuscation - module_function - - # From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb - COMPONENTS_REGEX_MAP = { - single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, - double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, - dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/, - uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/, - numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, - boolean_literals: /\b(?:true|false|null)\b/i, - hexadecimal_literals: /0x[0-9a-fA-F]+/, - comments: /(?:#|--).*?(?=\r|\n|$)/i, - multi_line_comments: %r{(?:\/\*.*?\*\/)}m, - oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/ - }.freeze - - DIALECT_COMPONENTS = { - default: COMPONENTS_REGEX_MAP.keys, - mysql: %i[single_quotes double_quotes numeric_literals boolean_literals - hexadecimal_literals comments multi_line_comments], - postgres: %i[single_quotes dollar_quotes uuids numeric_literals - boolean_literals comments multi_line_comments], - sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals - comments multi_line_comments], - oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments - multi_line_comments], - cassandra: %i[single_quotes uuids numeric_literals boolean_literals - hexadecimal_literals comments multi_line_comments] - }.freeze - - PLACEHOLDER = '?' - - # We use these to check whether the query contains any quote characters - # after obfuscation. If so, that's a good indication that the original - # query was malformed, and so our obfuscation can't reliably find - # literals. In such a case, we'll replace the entire query with a - # placeholder. - CLEANUP_REGEX = { - default: %r{'|"|\/\*|\*\/}, - mysql: %r{'|"|\/\*|\*\//}, - postgres: %r{'|\/\*|\*\/|\$(?!\?)/}, - sqlite: %r{'|\/\*|\*\//}, - cassandra: %r{'|\/\*|\*\//}, - oracle: %r{'|\/\*|\*\//} - }.freeze - - # @api private - def generate_regex(dialect) - components = DIALECT_COMPONENTS[dialect] - Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] }) - end - - DEFAULT_COMPONENTS_REGEX = generate_regex(:default) - MYSQL_COMPONENTS_REGEX = generate_regex(:mysql) - POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres) - SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite) - CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra) - ORACLE_COMPONENTS_REGEX = generate_regex(:oracle) - - # This is a SQL obfuscation utility intended for use in database adapter instrumentation. - # - # @param sql [String] The SQL to obfuscate. - # @param obfuscation_limit [optional Integer] the length at which the SQL string will not be obfuscated - # @param adapter [optional Symbol] the type of database adapter calling the method. `:default`, `:mysql` and `:postgres` are supported. - # @return [String] The SQL query string where the values are replaced with "?". When the sql statement exceeds the obufscation limit - # the first matched pair from the SQL statement will be returned, with an appended truncation message. If trunaction is unsuccessful, - # a string describing the error will be returned. - # - # @api public - def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default) - return "SQL not obfuscated, query exceeds #{obfuscation_limit} characters" if sql.size > obfuscation_limit - - regex = case adapter - when :mysql - MYSQL_COMPONENTS_REGEX - when :postgres - POSTGRES_COMPONENTS_REGEX - else - DEFAULT_COMPONENTS_REGEX - end - - # Original MySQL UTF-8 Encoding Fixes: - # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160 - # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345 - sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) - - sql = sql.gsub(regex, PLACEHOLDER) - return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql) - - sql - rescue StandardError => e - OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) - end - end - end -end diff --git a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb new file mode 100644 index 0000000000..73e711c19b --- /dev/null +++ b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'opentelemetry-common' +require_relative 'sql_processor/obfuscator' + +module OpenTelemetry + module Helpers + # SQL processing utilities for OpenTelemetry instrumentation. + # + # This module provides a unified interface for SQL processing operations + # commonly needed in database adapter instrumentation, including SQL obfuscation. + # + # @api public + module SqlProcessor + module_function + + # This is a SQL obfuscation utility intended for use in database adapter instrumentation. It uses the {Obfuscator} module. + # + # @param sql [String] The SQL to obfuscate. + # @param obfuscation_limit [optional Integer] the length at which the SQL string will not be obfuscated + # @param adapter [optional Symbol] the type of database adapter calling the method. `:default`, `:mysql`, `:postgres`, `:sqlite`, `:oracle`, `:cassandra` are supported. + # @return [String] The SQL query string where the values are replaced with "?". When the sql statement exceeds the obfuscation limit + # the first matched pair from the SQL statement will be returned, with an appended truncation message. If truncation is unsuccessful, + # a string describing the error will be returned. + # + # @api public + def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default) + Obfuscator.obfuscate_sql(sql, obfuscation_limit: obfuscation_limit, adapter: adapter) + end + end + end +end diff --git a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb new file mode 100644 index 0000000000..9021a727a6 --- /dev/null +++ b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0module OpenTelemetry + +require 'opentelemetry-common' + +module OpenTelemetry + module Helpers + module SqlProcessor + # + # This module contains SQL obfuscation behavior to share with + # instrumentation for specific database adapters. + # The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb + # + # To use this in your instrumentation, the `Instrumentation` class for + # your gem must contain configuration options for: + # * `:db_statement` + # Example: + # `option :db_statement, default: :include, validate: %I[omit include obfuscate]` + # * `:obfuscation_limit` + # Example: + # `option :obfuscation_limit, default: 2000, validate: :integer` + # + # If you want to add support for a new adapter, update the following + # constants to include keys for your adapter: + # * DIALECT_COMPONENTS + # * CLEANUP_REGEX + # You must also add a new constant that uses `generate_regex` with your + # adapter's dialect components that is named like + # `_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`. + # + # @api public + module Obfuscator + module_function + + # From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb + COMPONENTS_REGEX_MAP = { + single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, + double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, + dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/, + uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/, + numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, + boolean_literals: /\b(?:true|false|null)\b/i, + hexadecimal_literals: /0x[0-9a-fA-F]+/, + comments: /(?:#|--).*?(?=\r|\n|$)/i, + multi_line_comments: %r{(?:\/\*.*?\*\/)}m, + oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/ + }.freeze + + DIALECT_COMPONENTS = { + default: COMPONENTS_REGEX_MAP.keys, + mysql: %i[single_quotes double_quotes numeric_literals boolean_literals + hexadecimal_literals comments multi_line_comments], + postgres: %i[single_quotes dollar_quotes uuids numeric_literals + boolean_literals comments multi_line_comments], + sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals + comments multi_line_comments], + oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments + multi_line_comments], + cassandra: %i[single_quotes uuids numeric_literals boolean_literals + hexadecimal_literals comments multi_line_comments] + }.freeze + + PLACEHOLDER = '?' + + # We use these to check whether the query contains any quote characters + # after obfuscation. If so, that's a good indication that the original + # query was malformed, and so our obfuscation can't reliably find + # literals. In such a case, we'll replace the entire query with a + # placeholder. + CLEANUP_REGEX = { + default: %r{'|"|\/\*|\*\/}, + mysql: %r{'|"|\/\*|\*\//}, + postgres: %r{'|\/\*|\*\/|\$(?!\?)/}, + sqlite: %r{'|\/\*|\*\//}, + cassandra: %r{'|\/\*|\*\//}, + oracle: %r{'|\/\*|\*\//} + }.freeze + + # @api private + def generate_regex(dialect) + components = DIALECT_COMPONENTS[dialect] + Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] }) + end + + DEFAULT_COMPONENTS_REGEX = generate_regex(:default) + MYSQL_COMPONENTS_REGEX = generate_regex(:mysql) + POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres) + SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite) + CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra) + ORACLE_COMPONENTS_REGEX = generate_regex(:oracle) + + # Internal implementation of SQL obfuscation. + # Use SqlProcessor.obfuscate_sql for the public API. + # + # @api private + def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default) + return "SQL not obfuscated, query exceeds #{obfuscation_limit} characters" if sql.size > obfuscation_limit + + regex = case adapter + when :mysql + MYSQL_COMPONENTS_REGEX + when :postgres + POSTGRES_COMPONENTS_REGEX + else + DEFAULT_COMPONENTS_REGEX + end + + # Original MySQL UTF-8 Encoding Fixes: + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160 + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345 + sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) + + sql = sql.gsub(regex, PLACEHOLDER) + return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql) + + sql + rescue StandardError => e + OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) + end + end + end + end +end From 8dbf5e9724c511cd282c003b88ac8500c9146c97 Mon Sep 17 00:00:00 2001 From: Hannah Ramadan Date: Thu, 9 Oct 2025 15:00:21 -0700 Subject: [PATCH 2/4] Update require --- helpers/sql-processor/lib/opentelemetry/helpers.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helpers/sql-processor/lib/opentelemetry/helpers.rb b/helpers/sql-processor/lib/opentelemetry/helpers.rb index 671d2b6e1a..36ee012067 100644 --- a/helpers/sql-processor/lib/opentelemetry/helpers.rb +++ b/helpers/sql-processor/lib/opentelemetry/helpers.rb @@ -4,7 +4,7 @@ # # SPDX-License-Identifier: Apache-2.0 -require 'opentelemetry/helpers/sql_obfuscation' +require 'opentelemetry/helpers/sql_processor' module OpenTelemetry # The helpers module contains functionality shared across multiple From e787f7cc88cc0f55d0b23ee872934247aa9f1594 Mon Sep 17 00:00:00 2001 From: Hannah Ramadan Date: Thu, 9 Oct 2025 15:02:52 -0700 Subject: [PATCH 3/4] Update tests --- .../test/helpers/sql_obfuscation_test.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/helpers/sql-processor/test/helpers/sql_obfuscation_test.rb b/helpers/sql-processor/test/helpers/sql_obfuscation_test.rb index 39229d47f1..4896d5494e 100644 --- a/helpers/sql-processor/test/helpers/sql_obfuscation_test.rb +++ b/helpers/sql-processor/test/helpers/sql_obfuscation_test.rb @@ -9,11 +9,11 @@ require_relative '../test_helper' -class SqlObfuscationTest < Minitest::Test +class SqlProcessorTest < Minitest::Test def test_named_arg_defaults_obfuscates sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com'" expected = 'SELECT * from users where users.id = ? and users.email = ?' - result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql) + result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql) assert_equal(expected, result) end @@ -21,7 +21,7 @@ def test_named_arg_defaults_obfuscates def test_obfuscation_returns_message_when_limit_is_reached sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com'" expected = 'SQL not obfuscated, query exceeds 42 characters' - result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: 42) + result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, obfuscation_limit: 42) assert_equal(expected, result) end @@ -29,7 +29,7 @@ def test_obfuscation_returns_message_when_limit_is_reached def test_non_utf_8_encoded_string_obfuscates_with_mysql sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" expected = 'SELECT * from users where users.id = ? and users.email = ?' - result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :mysql) + result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :mysql) assert_equal(expected, result) end @@ -37,7 +37,7 @@ def test_non_utf_8_encoded_string_obfuscates_with_mysql def test_non_utf_8_encoded_string_obfuscates_with_postgres sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" expected = 'SELECT * from users where users.id = ? and users.email = ?' - result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :postgres) + result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :postgres) assert_equal(expected, result) end @@ -45,7 +45,7 @@ def test_non_utf_8_encoded_string_obfuscates_with_postgres def test_statement_with_emoji_encodes_utf_8_and_obfuscates sql = "SELECT * from users where users.id = 1 and users.email = 'test@😄.com'" expected = 'SELECT * from users where users.id = ? and users.email = ?' - result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql) + result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql) assert_equal(expected, result) end @@ -89,7 +89,7 @@ def self.load_fixture dialects.each do |dialect| define_method(:"test_sql_obfuscation_#{name}_#{dialect}") do - actual_obfuscated = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(query, adapter: dialect.to_sym) + actual_obfuscated = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(query, adapter: dialect.to_sym) message = build_failure_message(query, dialect, acceptable_outputs, actual_obfuscated) assert_includes(acceptable_outputs, actual_obfuscated, message) From aa94714e5bdf0ee2edfbd2b6283aba0c7e4364fa Mon Sep 17 00:00:00 2001 From: Hannah Ramadan Date: Thu, 9 Oct 2025 15:10:18 -0700 Subject: [PATCH 4/4] Rubocop: whitespace --- .../lib/opentelemetry/helpers/sql_processor.rb | 2 +- .../lib/opentelemetry/helpers/sql_processor/obfuscator.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb index 73e711c19b..b096a25a5b 100644 --- a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb +++ b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor.rb @@ -13,7 +13,7 @@ module Helpers # # This module provides a unified interface for SQL processing operations # commonly needed in database adapter instrumentation, including SQL obfuscation. - # + # # @api public module SqlProcessor module_function diff --git a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb index 9021a727a6..b985c1137f 100644 --- a/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb +++ b/helpers/sql-processor/lib/opentelemetry/helpers/sql_processor/obfuscator.rb @@ -54,11 +54,11 @@ module Obfuscator mysql: %i[single_quotes double_quotes numeric_literals boolean_literals hexadecimal_literals comments multi_line_comments], postgres: %i[single_quotes dollar_quotes uuids numeric_literals - boolean_literals comments multi_line_comments], + boolean_literals comments multi_line_comments], sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals - comments multi_line_comments], + comments multi_line_comments], oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments - multi_line_comments], + multi_line_comments], cassandra: %i[single_quotes uuids numeric_literals boolean_literals hexadecimal_literals comments multi_line_comments] }.freeze