|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +# Copyright The OpenTelemetry Authors |
| 4 | +# |
| 5 | +# SPDX-License-Identifier: Apache-2.0module OpenTelemetry |
| 6 | + |
| 7 | +require 'opentelemetry-common' |
| 8 | + |
| 9 | +module OpenTelemetry |
| 10 | + module Helpers |
| 11 | + module SqlProcessor |
| 12 | + # |
| 13 | + # This module contains SQL obfuscation behavior to share with |
| 14 | + # instrumentation for specific database adapters. |
| 15 | + # The class uses code from: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb |
| 16 | + # |
| 17 | + # To use this in your instrumentation, the `Instrumentation` class for |
| 18 | + # your gem must contain configuration options for: |
| 19 | + # * `:db_statement` |
| 20 | + # Example: |
| 21 | + # `option :db_statement, default: :include, validate: %I[omit include obfuscate]` |
| 22 | + # * `:obfuscation_limit` |
| 23 | + # Example: |
| 24 | + # `option :obfuscation_limit, default: 2000, validate: :integer` |
| 25 | + # |
| 26 | + # If you want to add support for a new adapter, update the following |
| 27 | + # constants to include keys for your adapter: |
| 28 | + # * DIALECT_COMPONENTS |
| 29 | + # * CLEANUP_REGEX |
| 30 | + # You must also add a new constant that uses `generate_regex` with your |
| 31 | + # adapter's dialect components that is named like |
| 32 | + # `<ADAPTER>_COMPONENTS_REGEX`, such as: `MYSQL_COMPONENTS_REGEX`. |
| 33 | + # |
| 34 | + # @api public |
| 35 | + module Obfuscator |
| 36 | + module_function |
| 37 | + |
| 38 | + # From: https://github.com/newrelic/newrelic-ruby-agent/blob/1fca78cc7a087421ad58088d8bea72c0362bc62f/lib/new_relic/agent/database/obfuscation_helpers.rb |
| 39 | + COMPONENTS_REGEX_MAP = { |
| 40 | + single_quotes: /'(?:[^']|'')*?(?:\\'.*|'(?!'))/, |
| 41 | + double_quotes: /"(?:[^"]|"")*?(?:\\".*|"(?!"))/, |
| 42 | + dollar_quotes: /(\$(?!\d)[^$]*?\$).*?(?:\1|$)/, |
| 43 | + uuids: /\{?(?:[0-9a-fA-F]\-*){32}\}?/, |
| 44 | + numeric_literals: /-?\b(?:[0-9]+\.)?[0-9]+([eE][+-]?[0-9]+)?\b/, |
| 45 | + boolean_literals: /\b(?:true|false|null)\b/i, |
| 46 | + hexadecimal_literals: /0x[0-9a-fA-F]+/, |
| 47 | + comments: /(?:#|--).*?(?=\r|\n|$)/i, |
| 48 | + multi_line_comments: %r{(?:\/\*.*?\*\/)}m, |
| 49 | + oracle_quoted_strings: /q'\[.*?(?:\]'|$)|q'\{.*?(?:\}'|$)|q'\<.*?(?:\>'|$)|q'\(.*?(?:\)'|$)/ |
| 50 | + }.freeze |
| 51 | + |
| 52 | + DIALECT_COMPONENTS = { |
| 53 | + default: COMPONENTS_REGEX_MAP.keys, |
| 54 | + mysql: %i[single_quotes double_quotes numeric_literals boolean_literals |
| 55 | + hexadecimal_literals comments multi_line_comments], |
| 56 | + postgres: %i[single_quotes dollar_quotes uuids numeric_literals |
| 57 | + boolean_literals comments multi_line_comments], |
| 58 | + sqlite: %i[single_quotes numeric_literals boolean_literals hexadecimal_literals |
| 59 | + comments multi_line_comments], |
| 60 | + oracle: %i[single_quotes oracle_quoted_strings numeric_literals comments |
| 61 | + multi_line_comments], |
| 62 | + cassandra: %i[single_quotes uuids numeric_literals boolean_literals |
| 63 | + hexadecimal_literals comments multi_line_comments] |
| 64 | + }.freeze |
| 65 | + |
| 66 | + PLACEHOLDER = '?' |
| 67 | + |
| 68 | + # We use these to check whether the query contains any quote characters |
| 69 | + # after obfuscation. If so, that's a good indication that the original |
| 70 | + # query was malformed, and so our obfuscation can't reliably find |
| 71 | + # literals. In such a case, we'll replace the entire query with a |
| 72 | + # placeholder. |
| 73 | + CLEANUP_REGEX = { |
| 74 | + default: %r{'|"|\/\*|\*\/}, |
| 75 | + mysql: %r{'|"|\/\*|\*\//}, |
| 76 | + postgres: %r{'|\/\*|\*\/|\$(?!\?)/}, |
| 77 | + sqlite: %r{'|\/\*|\*\//}, |
| 78 | + cassandra: %r{'|\/\*|\*\//}, |
| 79 | + oracle: %r{'|\/\*|\*\//} |
| 80 | + }.freeze |
| 81 | + |
| 82 | + # @api private |
| 83 | + def generate_regex(dialect) |
| 84 | + components = DIALECT_COMPONENTS[dialect] |
| 85 | + Regexp.union(components.map { |component| COMPONENTS_REGEX_MAP[component] }) |
| 86 | + end |
| 87 | + |
| 88 | + DEFAULT_COMPONENTS_REGEX = generate_regex(:default) |
| 89 | + MYSQL_COMPONENTS_REGEX = generate_regex(:mysql) |
| 90 | + POSTGRES_COMPONENTS_REGEX = generate_regex(:postgres) |
| 91 | + SQLITE_COMPONENTS_REGEX = generate_regex(:sqlite) |
| 92 | + CASSANDRA_COMPONENTS_REGEX = generate_regex(:cassandra) |
| 93 | + ORACLE_COMPONENTS_REGEX = generate_regex(:oracle) |
| 94 | + |
| 95 | + # Internal implementation of SQL obfuscation. |
| 96 | + # Use SqlProcessor.obfuscate_sql for the public API. |
| 97 | + # |
| 98 | + # @api private |
| 99 | + def obfuscate_sql(sql, obfuscation_limit: 2000, adapter: :default) |
| 100 | + return "SQL not obfuscated, query exceeds #{obfuscation_limit} characters" if sql.size > obfuscation_limit |
| 101 | + |
| 102 | + regex = case adapter |
| 103 | + when :mysql |
| 104 | + MYSQL_COMPONENTS_REGEX |
| 105 | + when :postgres |
| 106 | + POSTGRES_COMPONENTS_REGEX |
| 107 | + else |
| 108 | + DEFAULT_COMPONENTS_REGEX |
| 109 | + end |
| 110 | + |
| 111 | + # Original MySQL UTF-8 Encoding Fixes: |
| 112 | + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/160 |
| 113 | + # https://github.com/open-telemetry/opentelemetry-ruby-contrib/pull/345 |
| 114 | + sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) |
| 115 | + |
| 116 | + sql = sql.gsub(regex, PLACEHOLDER) |
| 117 | + return 'Failed to obfuscate SQL query - quote characters remained after obfuscation' if CLEANUP_REGEX[adapter].match(sql) |
| 118 | + |
| 119 | + sql |
| 120 | + rescue StandardError => e |
| 121 | + OpenTelemetry.handle_error(message: 'Failed to obfuscate SQL', exception: e) |
| 122 | + end |
| 123 | + end |
| 124 | + end |
| 125 | + end |
| 126 | +end |
0 commit comments