Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion helpers/sql-processor/lib/opentelemetry/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# SPDX-License-Identifier: Apache-2.0

require 'opentelemetry/helpers/sql_obfuscation'
require 'opentelemetry/helpers/sql_processor'

module OpenTelemetry
# The helpers module contains functionality shared across multiple
Expand Down
130 changes: 0 additions & 130 deletions helpers/sql-processor/lib/opentelemetry/helpers/sql_obfuscation.rb

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# frozen_string_literal: true

# Copyright The OpenTelemetry Authors
#
# SPDX-License-Identifier: Apache-2.0

require 'opentelemetry-common'
require_relative 'sql_processor/obfuscator'

module OpenTelemetry
module Helpers
# SQL processing utilities for OpenTelemetry instrumentation.
#
# This module provides a unified interface for SQL processing operations
# commonly needed in database adapter instrumentation, including SQL obfuscation.
#
# @api public
module SqlProcessor
module_function

# This is a SQL obfuscation utility intended for use in database adapter instrumentation. It uses the {Obfuscator} module.
#
# @param sql [String] The SQL to obfuscate.
# @param obfuscation_limit [optional Integer] the length at which the SQL string will not be obfuscated
# @param adapter [optional Symbol] the type of database adapter calling the method. `:default`, `:mysql`, `:postgres`, `:sqlite`, `:oracle`, `:cassandra` are supported.
# @return [String] The SQL query string where the values are replaced with "?". When the sql statement exceeds the obfuscation limit
# the first matched pair from the SQL statement will be returned, with an appended truncation message. If truncation is unsuccessful,
# a string describing the error will be returned.
#
# @api public
def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default)
Obfuscator.obfuscate_sql(sql, obfuscation_limit: obfuscation_limit, adapter: adapter)
end
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# frozen_string_literal: true

# Copyright The OpenTelemetry Authors
#
# SPDX-License-Identifier: Apache-2.0module OpenTelemetry

require 'opentelemetry-common'

module OpenTelemetry
module Helpers
module SqlProcessor
#
# This module contains SQL obfuscation behavior to share with
# instrumentation for specific database adapters.
# The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb
#
# To use this in your instrumentation, the `Instrumentation` class for
# your gem must contain configuration options for:
# * `:db_statement`
# Example:
# `option :db_statement, default: :include, validate: %I[omit include obfuscate]`
# * `:obfuscation_limit`
# Example:
# `option :obfuscation_limit, default: 2000, validate: :integer`
#
# If you want to add support for a new adapter, update the following
# constants to include keys for your adapter:
# * DIALECT_COMPONENTS
# * CLEANUP_REGEX
# You must also add a new constant that uses `generate_regex` with your
# adapter's dialect components that is named like
# `<ADAPTER>_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`.
#
# @api public
module Obfuscator
module_function

# From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb
COMPONENTS_REGEX_MAP = {
single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/,
double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/,
dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/,
uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/,
numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/,
boolean_literals: /\b(?:true|false|null)\b/i,
hexadecimal_literals: /0x[0-9a-fA-F]+/,
comments: /(?:#|--).*?(?=\r|\n|$)/i,
multi_line_comments: %r{(?:\/\*.*?\*\/)}m,
oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/
}.freeze

DIALECT_COMPONENTS = {
default: COMPONENTS_REGEX_MAP.keys,
mysql: %i[single_quotes double_quotes numeric_literals boolean_literals
hexadecimal_literals comments multi_line_comments],
postgres: %i[single_quotes dollar_quotes uuids numeric_literals
boolean_literals comments multi_line_comments],
sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals
comments multi_line_comments],
oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments
multi_line_comments],
cassandra: %i[single_quotes uuids numeric_literals boolean_literals
hexadecimal_literals comments multi_line_comments]
}.freeze

PLACEHOLDER = '?'

# We use these to check whether the query contains any quote characters
# after obfuscation. If so, that's a good indication that the original
# query was malformed, and so our obfuscation can't reliably find
# literals. In such a case, we'll replace the entire query with a
# placeholder.
CLEANUP_REGEX = {
default: %r{'|"|\/\*|\*\/},
mysql: %r{'|"|\/\*|\*\//},
postgres: %r{'|\/\*|\*\/|\$(?!\?)/},
sqlite: %r{'|\/\*|\*\//},
cassandra: %r{'|\/\*|\*\//},
oracle: %r{'|\/\*|\*\//}
}.freeze

# @api private
def generate_regex(dialect)
components = DIALECT_COMPONENTS[dialect]
Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] })
end

DEFAULT_COMPONENTS_REGEX = generate_regex(:default)
MYSQL_COMPONENTS_REGEX = generate_regex(:mysql)
POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres)
SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite)
CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra)
ORACLE_COMPONENTS_REGEX = generate_regex(:oracle)

# Internal implementation of SQL obfuscation.
# Use SqlProcessor.obfuscate_sql for the public API.
#
# @api private
def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default)
return "SQL not obfuscated, query exceeds #{obfuscation_limit} characters" if sql.size > obfuscation_limit

regex = case adapter
when :mysql
MYSQL_COMPONENTS_REGEX
when :postgres
POSTGRES_COMPONENTS_REGEX
else
DEFAULT_COMPONENTS_REGEX
end

# Original MySQL UTF-8 Encoding Fixes:
# https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160
# https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345
sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true)

sql = sql.gsub(regex, PLACEHOLDER)
return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql)

sql
rescue StandardError => e
OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e)
end
end
end
end
end
14 changes: 7 additions & 7 deletions helpers/sql-processor/test/helpers/sql_obfuscation_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,43 @@

require_relative '../test_helper'

class SqlObfuscationTest < Minitest::Test
class SqlProcessorTest < Minitest::Test
def test_named_arg_defaults_obfuscates
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]'"
expected = 'SELECT * from users where users.id = ? and users.email = ?'
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql)
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql)

assert_equal(expected, result)
end

def test_obfuscation_returns_message_when_limit_is_reached
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]'"
expected = 'SQL not obfuscated, query exceeds 42 characters'
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, obfuscation_limit: 42)
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, obfuscation_limit: 42)

assert_equal(expected, result)
end

def test_non_utf_8_encoded_string_obfuscates_with_mysql
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]\255'"
expected = 'SELECT * from users where users.id = ? and users.email = ?'
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :mysql)
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :mysql)

assert_equal(expected, result)
end

def test_non_utf_8_encoded_string_obfuscates_with_postgres
sql = "SELECT * from users where users.id = 1 and users.email = '[email protected]\255'"
expected = 'SELECT * from users where users.id = ? and users.email = ?'
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql, adapter: :postgres)
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql, adapter: :postgres)

assert_equal(expected, result)
end

def test_statement_with_emoji_encodes_utf_8_and_obfuscates
sql = "SELECT * from users where users.id = 1 and users.email = 'test@😄.com'"
expected = 'SELECT * from users where users.id = ? and users.email = ?'
result = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(sql)
result = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(sql)

assert_equal(expected, result)
end
Expand Down Expand Up @@ -89,7 +89,7 @@ def self.load_fixture

dialects.each do |dialect|
define_method(:"test_sql_obfuscation_#{name}_#{dialect}") do
actual_obfuscated = OpenTelemetry::Helpers::SqlObfuscation.obfuscate_sql(query, adapter: dialect.to_sym)
actual_obfuscated = OpenTelemetry::Helpers::SqlProcessor.obfuscate_sql(query, adapter: dialect.to_sym)
message = build_failure_message(query, dialect, acceptable_outputs, actual_obfuscated)

assert_includes(acceptable_outputs, actual_obfuscated, message)
Expand Down