From ce301b05580f59685cf00bdb46eb763481bc8824 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 3 Dec 2020 11:41:36 -0700 Subject: [PATCH 1/9] Align sanitize_field_names with cross-agent spec --- docs/configuration.asciidoc | 30 ++++---- elasticapm/conf/__init__.py | 4 +- elasticapm/conf/constants.py | 29 ++++++-- elasticapm/processors.py | 6 +- tests/processors/tests.py | 129 +++++++++++++++++------------------ 5 files changed, 109 insertions(+), 89 deletions(-) diff --git a/docs/configuration.asciidoc b/docs/configuration.asciidoc index 9ea552bdf..a9298e047 100644 --- a/docs/configuration.asciidoc +++ b/docs/configuration.asciidoc @@ -707,23 +707,29 @@ WARNING: We recommend always including the default set of validators if you cust [[config-sanitize-field-names]] ==== `sanitize_field_names` +<> + [options="header"] |============ | Environment | Django/Flask | Default -| `ELASTIC_APM_SANITIZE_FIELD_NAMES` | `SANITIZE_FIELD_NAMES` | `['authorization', - 'password', - 'secret', - 'passwd', - 'token', - 'api_key', - 'access_token', - 'sessionid']` -|============ - -A list of field names to mask when using processors. +| `ELASTIC_APM_SANITIZE_FIELD_NAMES` | `SANITIZE_FIELD_NAMES` | `["password", + "passwd", + "pwd", + "secret", + "*key", + "*token*", + "*session*", + "*credit*", + "*card*", + "authorization", + "set-cookie"]` +|============ + +A list of glob-matched field names to match and mask when using processors. For more information, see <>. -WARNING: We recommend always including the default set of field names if you customize this setting. +WARNING: We recommend always including the default set of field name matches +if you customize this setting. [float] diff --git a/elasticapm/conf/__init__.py b/elasticapm/conf/__init__.py index b27884d6a..2341d73f3 100644 --- a/elasticapm/conf/__init__.py +++ b/elasticapm/conf/__init__.py @@ -507,7 +507,9 @@ class Config(_ConfigBase): "elasticapm.processors.sanitize_http_request_body", ], ) - sanitize_field_names = _ListConfigValue("SANITIZE_FIELD_NAMES", default=BASE_SANITIZE_FIELD_NAMES) + sanitize_field_names = _ListConfigValue( + "SANITIZE_FIELD_NAMES", type=starmatch_to_regex, default=BASE_SANITIZE_FIELD_NAMES + ) metrics_sets = _ListConfigValue( "METRICS_SETS", default=[ diff --git a/elasticapm/conf/constants.py b/elasticapm/conf/constants.py index ac533392b..101d136a8 100644 --- a/elasticapm/conf/constants.py +++ b/elasticapm/conf/constants.py @@ -60,14 +60,31 @@ HARDCODED_PROCESSORS = ["elasticapm.processors.add_context_lines_to_frames"] BASE_SANITIZE_FIELD_NAMES = [ - "authorization", + re.compile("(?:password)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:passwd)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:pwd)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:secret)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:.*key)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:.*token.*)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:.*session.*)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:.*credit.*)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:.*card.*)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:authorization)\\Z", re.IGNORECASE | re.DOTALL), + re.compile("(?:set\\-cookie)\\Z", re.IGNORECASE | re.DOTALL), +] + +BASE_SANITIZE_FIELD_NAMES_UNPROCESSED = [ "password", - "secret", "passwd", - "token", - "api_key", - "access_token", - "sessionid", + "pwd", + "secret", + "*key", + "*token*", + "*session*", + "*credit*", + "*card*", + "authorization", + "set-cookie", ] OUTCOME = namedtuple("OUTCOME", ["SUCCESS", "FAILURE", "UNKNOWN"])( diff --git a/elasticapm/processors.py b/elasticapm/processors.py index e241b7977..fbb77878e 100644 --- a/elasticapm/processors.py +++ b/elasticapm/processors.py @@ -276,9 +276,9 @@ def mark_in_app_frames(client, event): def _sanitize(key, value, **kwargs): if "sanitize_field_names" in kwargs: - sanitize_field_names = frozenset(kwargs["sanitize_field_names"]) + sanitize_field_names = kwargs["sanitize_field_names"] else: - sanitize_field_names = frozenset(BASE_SANITIZE_FIELD_NAMES) + sanitize_field_names = BASE_SANITIZE_FIELD_NAMES if value is None: return @@ -296,7 +296,7 @@ def _sanitize(key, value, **kwargs): key = key.lower() for field in sanitize_field_names: - if field in key: + if field.match(key.strip()): # store mask as a fixed length for security return MASK return value diff --git a/tests/processors/tests.py b/tests/processors/tests.py index 9500d9aed..34fc25cdb 100644 --- a/tests/processors/tests.py +++ b/tests/processors/tests.py @@ -40,7 +40,7 @@ import elasticapm from elasticapm import Client, processors -from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, ERROR, SPAN, TRANSACTION +from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES_UNPROCESSED, ERROR, SPAN, TRANSACTION from elasticapm.utils import compat @@ -49,33 +49,25 @@ def http_test_data(): return { "context": { "request": { - "body": "foo=bar&password=123456&the_secret=abc&cc=1234567890098765&custom_field=123", - "env": { - "foo": "bar", - "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", - "custom_env": "bye", - }, + "body": "foo=bar&password=123456&secret=abc&cc=1234567890098765&custom_field=123", + "env": {"foo": "bar", "password": "hello", "secret": "hello", "custom_env": "bye",}, "headers": { "foo": "bar", "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", + "secret": "hello", "authorization": "bearer xyz", "some-header": "some-secret-value", }, "cookies": { "foo": "bar", "password": "topsecret", - "the_secret": "topsecret", + "secret": "topsecret", "sessionid": "123", - "a_password_here": "123456", "custom-sensitive-cookie": "123", }, "url": { - "full": "http://example.com/bla?foo=bar&password=123456&the_secret=abc&cc=1234567890098765&custom_field=123", - "search": "foo=bar&password=123456&the_secret=abc&cc=1234567890098765&custom_field=123", + "full": "http://example.com/bla?foo=bar&password=123456&secret=abc&cc=1234567890098765&custom_field=123", + "search": "foo=bar&password=123456&secret=abc&cc=1234567890098765&custom_field=123", }, }, "response": { @@ -83,8 +75,7 @@ def http_test_data(): "headers": { "foo": "bar", "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", + "secret": "hello", "authorization": "bearer xyz", "some-header": "some-secret-value", }, @@ -96,7 +87,10 @@ def http_test_data(): @pytest.mark.parametrize( "elasticapm_client, custom_field", [ - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["sensitive-stacktrace-val"]}, processors.MASK), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["sensitive-stacktrace-val"]}, + processors.MASK, + ), ({}, "bye"), ], indirect=["elasticapm_client"], @@ -105,15 +99,7 @@ def test_stacktrace(elasticapm_client, custom_field): data = { "exception": { "stacktrace": [ - { - "vars": { - "foo": "bar", - "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", - "sensitive-stacktrace-val": "bye", - } - } + {"vars": {"foo": "bar", "password": "hello", "secret": "hello", "sensitive-stacktrace-val": "bye",}} ], "cause": [ { @@ -122,8 +108,7 @@ def test_stacktrace(elasticapm_client, custom_field): "vars": { "foo": "bar", "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", + "secret": "hello", "sensitive-stacktrace-val": "bye", } } @@ -135,8 +120,7 @@ def test_stacktrace(elasticapm_client, custom_field): "vars": { "foo": "bar", "password": "hello", - "the_secret": "hello", - "a_password_here": "hello", + "secret": "hello", "sensitive-stacktrace-val": "bye", } } @@ -164,10 +148,8 @@ def test_stacktrace(elasticapm_client, custom_field): assert vars["foo"] == "bar" assert "password" in vars assert vars["password"] == processors.MASK - assert "the_secret" in vars - assert vars["the_secret"] == processors.MASK - assert "a_password_here" in vars - assert vars["a_password_here"] == processors.MASK + assert "secret" in vars + assert vars["secret"] == processors.MASK assert "sensitive-stacktrace-val" in vars assert vars["sensitive-stacktrace-val"] == custom_field @@ -184,14 +166,24 @@ def test_remove_http_request_body(http_test_data): "elasticapm_client, custom_field, expected_header_cookies", [ ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom-sensitive-cookie"]}, + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom-sensitive-cookie"]}, {"custom-sensitive-cookie": processors.MASK}, - "foo=bar; password={0}; the_secret={0}; csrftoken={0}; custom-sensitive-cookie={0}".format(processors.MASK), + "foo=bar; password={0}; secret={0}; csrftoken={0}; custom-sensitive-cookie={0}".format(processors.MASK), ), ( {}, {"custom-sensitive-cookie": "123"}, - "foo=bar; password={0}; the_secret={0}; csrftoken={0}; custom-sensitive-cookie=123".format(processors.MASK), + "foo=bar; password={0}; secret={0}; csrftoken={0}; custom-sensitive-cookie=123".format(processors.MASK), + ), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom-sensitive-*"]}, + {"custom-sensitive-cookie": processors.MASK}, + "foo=bar; password={0}; secret={0}; csrftoken={0}; custom-sensitive-cookie={0}".format(processors.MASK), + ), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom-sensitive"]}, + {"custom-sensitive-cookie": "123"}, + "foo=bar; password={0}; secret={0}; csrftoken={0}; custom-sensitive-cookie=123".format(processors.MASK), ), ], indirect=["elasticapm_client"], @@ -199,15 +191,14 @@ def test_remove_http_request_body(http_test_data): def test_sanitize_http_request_cookies(elasticapm_client, custom_field, expected_header_cookies, http_test_data): http_test_data["context"]["request"]["headers"][ "cookie" - ] = "foo=bar; password=12345; the_secret=12345; csrftoken=abc; custom-sensitive-cookie=123" + ] = "foo=bar; password=12345; secret=12345; csrftoken=abc; custom-sensitive-cookie=123" result = processors.sanitize_http_request_cookies(elasticapm_client, http_test_data) expected = { "foo": "bar", "password": processors.MASK, - "the_secret": processors.MASK, + "secret": processors.MASK, "sessionid": processors.MASK, - "a_password_here": processors.MASK, } expected.update(custom_field) @@ -221,7 +212,7 @@ def test_sanitize_http_request_cookies(elasticapm_client, custom_field, expected "elasticapm_client, expected_cookies", [ ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom-sensitive-cookie"]}, + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom-sensitive-cookie"]}, "foo=bar; httponly; secure ; sessionid={0}; httponly; secure; custom-sensitive-cookie={0}".format( processors.MASK ), @@ -248,10 +239,19 @@ def test_sanitize_http_response_cookies(elasticapm_client, expected_cookies, htt @pytest.mark.parametrize( "elasticapm_client, custom_header", [ - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["some-header"]}, {"some-header": processors.MASK}), - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["some-"]}, {"some-header": processors.MASK}), - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["other-val"]}, {"some-header": "some-secret-value"}), - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["-"]}, {"some-header": processors.MASK}), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["some-header"]}, + {"some-header": processors.MASK}, + ), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["some-*"]}, + {"some-header": processors.MASK}, + ), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["other-val"]}, + {"some-header": "some-secret-value"}, + ), + ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["*-*"]}, {"some-header": processors.MASK}), ], indirect=["elasticapm_client"], ) @@ -260,8 +260,7 @@ def test_sanitize_http_headers(elasticapm_client, custom_header, http_test_data) expected = { "foo": "bar", "password": processors.MASK, - "the_secret": processors.MASK, - "a_password_here": processors.MASK, + "secret": processors.MASK, "authorization": processors.MASK, } expected.update(custom_header) @@ -272,7 +271,10 @@ def test_sanitize_http_headers(elasticapm_client, custom_header, http_test_data) @pytest.mark.parametrize( "elasticapm_client, custom_env", [ - ({"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom_env"]}, {"custom_env": processors.MASK}), + ( + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_env"]}, + {"custom_env": processors.MASK}, + ), ({}, {"custom_env": "bye"}), ], indirect=["elasticapm_client"], @@ -282,8 +284,7 @@ def test_sanitize_http_wgi_env(elasticapm_client, custom_env, http_test_data): expected = { "foo": "bar", "password": processors.MASK, - "the_secret": processors.MASK, - "a_password_here": processors.MASK, + "secret": processors.MASK, } expected.update(custom_env) assert result["context"]["request"]["env"] == expected @@ -293,10 +294,10 @@ def test_sanitize_http_wgi_env(elasticapm_client, custom_env, http_test_data): "elasticapm_client, expected", [ ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom_field"]}, - "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, + "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -320,10 +321,10 @@ def test_sanitize_http_query_string_max_length(elasticapm_client): "elasticapm_client, expected", [ ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom_field"]}, - "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, + "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -336,10 +337,10 @@ def test_post_as_string(elasticapm_client, expected, http_test_data): "elasticapm_client, expected", [ ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES + ["custom_field"]}, - "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, + "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&the_secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -371,13 +372,7 @@ def test_non_utf8_encoding(elasticapm_client, http_test_data): def test_remove_stacktrace_locals(): - data = { - "exception": { - "stacktrace": [ - {"vars": {"foo": "bar", "password": "hello", "the_secret": "hello", "a_password_here": "hello"}} - ] - } - } + data = {"exception": {"stacktrace": [{"vars": {"foo": "bar", "password": "hello", "secret": "hello"}}]}} result = processors.remove_stacktrace_locals(None, data) assert "stacktrace" in result["exception"] From 8648dc8e5fbc83400949467e7d22da1074fe4340 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 3 Dec 2020 11:55:35 -0700 Subject: [PATCH 2/9] Add CHANGELOG --- CHANGELOG.asciidoc | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index aeb447c70..198e49a57 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -30,6 +30,25 @@ endif::[] //===== Bug fixes // +[float] +=== Unreleased + +// Unreleased changes go here +// When the next release happens, nest these changes under the "Python Agent version 5.x" heading +[float] +===== Breaking changes + +* Align `sanitize_field_names` config with the + https://github.com/elastic/apm/blob/3fa78e2a1eeea81c73c2e16e96dbf6b2e79f3c64/specs/agents/sanitization.md[cross-agent spec]. + If you are using a non-default `sanitize_field_names`, surrounding each of your entries with stars (i.e. `\*secret\*`) will retain the old behavior. {pull}982[#982] + +[float] +===== Features + +[float] +===== Bug fixes + + [[release-notes-5.x]] === Python Agent version 5.x @@ -45,7 +64,7 @@ endif::[] * Implement `transaction_ignore_urls` config (supports central config) {pull}923[#923] * Add public API to retrieve trace parent header {pull}956[#956] * Added support for cgroup memory metrics {pull}846[#846] - + [float] ===== Bug fixes From f356d6c8ece15dc6291b963d0713284c4bc50caf Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 8 Dec 2020 14:17:07 -0700 Subject: [PATCH 3/9] Remove value sanitization The value of the credit card check was dubious, and this change will improve performance. --- elasticapm/processors.py | 6 ------ tests/processors/tests.py | 22 ++++++---------------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/elasticapm/processors.py b/elasticapm/processors.py index fbb77878e..c6d09cd07 100644 --- a/elasticapm/processors.py +++ b/elasticapm/processors.py @@ -29,7 +29,6 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -import re import warnings from collections import defaultdict @@ -38,8 +37,6 @@ from elasticapm.utils.encoding import force_text, keyword_field from elasticapm.utils.stacks import get_lines_from_file -SANITIZE_VALUE_PATTERNS = [re.compile(r"^[- \d]{16,19}$")] # credit card numbers, with or without spacers - def for_events(*events): """ @@ -283,9 +280,6 @@ def _sanitize(key, value, **kwargs): if value is None: return - if isinstance(value, compat.string_types) and any(pattern.match(value) for pattern in SANITIZE_VALUE_PATTERNS): - return MASK - if isinstance(value, dict): # varmap will call _sanitize on each k:v pair of the dict, so we don't # have to do anything with dicts here diff --git a/tests/processors/tests.py b/tests/processors/tests.py index 34fc25cdb..9e1d2cfe6 100644 --- a/tests/processors/tests.py +++ b/tests/processors/tests.py @@ -295,9 +295,9 @@ def test_sanitize_http_wgi_env(elasticapm_client, custom_env, http_test_data): [ ( {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, - "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -322,9 +322,9 @@ def test_sanitize_http_query_string_max_length(elasticapm_client): [ ( {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, - "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -338,9 +338,9 @@ def test_post_as_string(elasticapm_client, expected, http_test_data): [ ( {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, - "foo=bar&password={0}&secret={0}&cc={0}&custom_field={0}".format(processors.MASK), + "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field={0}".format(processors.MASK), ), - ({}, "foo=bar&password={0}&secret={0}&cc={0}&custom_field=123".format(processors.MASK)), + ({}, "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field=123".format(processors.MASK)), ], indirect=["elasticapm_client"], ) @@ -349,16 +349,6 @@ def test_querystring_as_string_with_partials(elasticapm_client, expected, http_t assert result["context"]["request"]["url"]["search"] == expected -def test_sanitize_credit_card(): - result = processors._sanitize("foo", "4242424242424242") - assert result == processors.MASK - - -def test_sanitize_credit_card_with_spaces(): - result = processors._sanitize("foo", "4242 4242 4242 4242") - assert result == processors.MASK - - def test_sanitize_dict(): result = processors._sanitize("foo", {1: 2}) assert result == {1: 2} From 92927a1794a8be0cbd70807d7cfeffe23fc446f5 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 8 Dec 2020 14:17:46 -0700 Subject: [PATCH 4/9] Move to MASK recommended by https://github.com/elastic/apm/pull/378 --- elasticapm/conf/constants.py | 2 +- tests/processors/tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/elasticapm/conf/constants.py b/elasticapm/conf/constants.py index 101d136a8..c3d791fa7 100644 --- a/elasticapm/conf/constants.py +++ b/elasticapm/conf/constants.py @@ -46,7 +46,7 @@ HTTP_WITH_BODY = {"POST", "PUT", "PATCH", "DELETE"} -MASK = 8 * "*" +MASK = "[REDACTED]" EXCEPTION_CHAIN_MAX_DEPTH = 50 diff --git a/tests/processors/tests.py b/tests/processors/tests.py index 9e1d2cfe6..42c14ca0b 100644 --- a/tests/processors/tests.py +++ b/tests/processors/tests.py @@ -312,7 +312,7 @@ def test_sanitize_http_query_string_max_length(elasticapm_client): data = {"context": {"request": {"url": {"full": "http://example.com/bla?" + qs, "search": qs}}}} result = processors.sanitize_http_request_querystring(elasticapm_client, data) for val in (result["context"]["request"]["url"]["search"], result["context"]["request"]["url"]["full"]): - assert "api_key=********" in val + assert "api_key=[REDACTED]" in val assert len(val) == 1024 assert val.endswith(u"x…") From 7a03b22e7728dc9b47d7c08db7dc5bda524dcf1a Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 8 Dec 2020 14:19:53 -0700 Subject: [PATCH 5/9] Update changelog --- CHANGELOG.asciidoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 198e49a57..f0f6f444f 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -40,7 +40,10 @@ endif::[] * Align `sanitize_field_names` config with the https://github.com/elastic/apm/blob/3fa78e2a1eeea81c73c2e16e96dbf6b2e79f3c64/specs/agents/sanitization.md[cross-agent spec]. - If you are using a non-default `sanitize_field_names`, surrounding each of your entries with stars (i.e. `\*secret\*`) will retain the old behavior. {pull}982[#982] + If you are using a non-default `sanitize_field_names`, surrounding each of your entries with stars (i.e. + `\*secret\*`) will retain the old behavior. {pull}982[#982] +* Remove credit card sanitization for field values. This improves performance, and the security value of this check was + dubious anyway. {pull}982[#982] [float] ===== Features From 88850a72a539324dfb6fe665c5ec0bcca39f2137 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 8 Dec 2020 15:18:33 -0700 Subject: [PATCH 6/9] Remove querystring sanitization --- CHANGELOG.asciidoc | 2 ++ docs/configuration.asciidoc | 1 - docs/sanitizing-data.asciidoc | 1 - elasticapm/conf/__init__.py | 1 - elasticapm/processors.py | 32 ++------------------- tests/processors/tests.py | 52 ++++------------------------------- 6 files changed, 11 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index f0f6f444f..1adc8d032 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -44,6 +44,8 @@ endif::[] `\*secret\*`) will retain the old behavior. {pull}982[#982] * Remove credit card sanitization for field values. This improves performance, and the security value of this check was dubious anyway. {pull}982[#982] +* Remove HTTP querystring sanitization. This improves performance, and is meant to standardize behavior across the + agents, as defined in https://github.com/elastic/apm/pull/334. {pull}982[#982] [float] ===== Features diff --git a/docs/configuration.asciidoc b/docs/configuration.asciidoc index a9298e047..b9d2929aa 100644 --- a/docs/configuration.asciidoc +++ b/docs/configuration.asciidoc @@ -694,7 +694,6 @@ to avoid stampedes of instances that start at the same time. 'elasticapm.processors.sanitize_http_request_cookies', 'elasticapm.processors.sanitize_http_headers', 'elasticapm.processors.sanitize_http_wsgi_env', - 'elasticapm.processors.sanitize_http_request_querystring', 'elasticapm.processors.sanitize_http_request_body']` |============ diff --git a/docs/sanitizing-data.asciidoc b/docs/sanitizing-data.asciidoc index c12a80921..b1c7c8c4e 100644 --- a/docs/sanitizing-data.asciidoc +++ b/docs/sanitizing-data.asciidoc @@ -41,7 +41,6 @@ ELASTIC_APM = { 'elasticapm.processors.sanitize_http_request_cookies', 'elasticapm.processors.sanitize_http_headers', 'elasticapm.processors.sanitize_http_wsgi_env', - 'elasticapm.processors.sanitize_http_request_querystring', 'elasticapm.processors.sanitize_http_request_body', ), } diff --git a/elasticapm/conf/__init__.py b/elasticapm/conf/__init__.py index 2341d73f3..b887d1b3c 100644 --- a/elasticapm/conf/__init__.py +++ b/elasticapm/conf/__init__.py @@ -503,7 +503,6 @@ class Config(_ConfigBase): "elasticapm.processors.sanitize_http_response_cookies", "elasticapm.processors.sanitize_http_headers", "elasticapm.processors.sanitize_http_wsgi_env", - "elasticapm.processors.sanitize_http_request_querystring", "elasticapm.processors.sanitize_http_request_body", ], ) diff --git a/elasticapm/processors.py b/elasticapm/processors.py index c6d09cd07..88f0f97a4 100644 --- a/elasticapm/processors.py +++ b/elasticapm/processors.py @@ -34,7 +34,7 @@ from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, ERROR, MASK, SPAN, TRANSACTION from elasticapm.utils import compat, varmap -from elasticapm.utils.encoding import force_text, keyword_field +from elasticapm.utils.encoding import force_text from elasticapm.utils.stacks import get_lines_from_file @@ -113,7 +113,7 @@ def sanitize_http_request_cookies(client, event): # sanitize request.header.cookie string try: - cookie_string = event["context"]["request"]["headers"]["cookie"] + cookie_string = force_text(event["context"]["request"]["headers"]["cookie"], errors="replace") event["context"]["request"]["headers"]["cookie"] = _sanitize_string( cookie_string, "; ", "=", sanitize_field_names=client.config.sanitize_field_names ) @@ -131,7 +131,7 @@ def sanitize_http_response_cookies(client, event): :return: The modified event """ try: - cookie_string = event["context"]["response"]["headers"]["set-cookie"] + cookie_string = force_text(event["context"]["response"]["headers"]["set-cookie"], errors="replace") event["context"]["response"]["headers"]["set-cookie"] = _sanitize_string( cookie_string, ";", "=", sanitize_field_names=client.config.sanitize_field_names ) @@ -187,32 +187,6 @@ def sanitize_http_wsgi_env(client, event): return event -@for_events(ERROR, TRANSACTION) -def sanitize_http_request_querystring(client, event): - """ - Sanitizes http request query string - :param client: an ElasticAPM client - :param event: a transaction or error event - :return: The modified event - """ - try: - query_string = force_text(event["context"]["request"]["url"]["search"], errors="replace") - except (KeyError, TypeError): - return event - if "=" in query_string: - sanitized_query_string = _sanitize_string( - query_string, "&", "=", sanitize_field_names=client.config.sanitize_field_names - ) - full_url = event["context"]["request"]["url"]["full"] - # we need to pipe the sanitized string through encoding.keyword_field to ensure that the maximum - # length of keyword fields is still ensured. - event["context"]["request"]["url"]["search"] = keyword_field(sanitized_query_string) - event["context"]["request"]["url"]["full"] = keyword_field( - full_url.replace(query_string, sanitized_query_string) - ) - return event - - @for_events(ERROR, TRANSACTION) def sanitize_http_request_body(client, event): """ diff --git a/tests/processors/tests.py b/tests/processors/tests.py index 42c14ca0b..a1ab583b3 100644 --- a/tests/processors/tests.py +++ b/tests/processors/tests.py @@ -57,6 +57,7 @@ def http_test_data(): "secret": "hello", "authorization": "bearer xyz", "some-header": "some-secret-value", + "cookie": "foo=bar; baz=foo", }, "cookies": { "foo": "bar", @@ -78,6 +79,7 @@ def http_test_data(): "secret": "hello", "authorization": "bearer xyz", "some-header": "some-secret-value", + "cookie": "foo=bar; baz=foo", }, }, } @@ -258,6 +260,7 @@ def test_sanitize_http_response_cookies(elasticapm_client, expected_cookies, htt def test_sanitize_http_headers(elasticapm_client, custom_header, http_test_data): result = processors.sanitize_http_headers(elasticapm_client, http_test_data) expected = { + "cookie": "foo=bar; baz=foo", "foo": "bar", "password": processors.MASK, "secret": processors.MASK, @@ -290,33 +293,6 @@ def test_sanitize_http_wgi_env(elasticapm_client, custom_env, http_test_data): assert result["context"]["request"]["env"] == expected -@pytest.mark.parametrize( - "elasticapm_client, expected", - [ - ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, - "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field={0}".format(processors.MASK), - ), - ({}, "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field=123".format(processors.MASK)), - ], - indirect=["elasticapm_client"], -) -def test_sanitize_http_query_string(elasticapm_client, expected, http_test_data): - result = processors.sanitize_http_request_querystring(elasticapm_client, http_test_data) - assert result["context"]["request"]["url"]["search"] == expected - assert result["context"]["request"]["url"]["full"].endswith(expected) - - -def test_sanitize_http_query_string_max_length(elasticapm_client): - qs = "api_key=1&v=" + 1020 * "x" - data = {"context": {"request": {"url": {"full": "http://example.com/bla?" + qs, "search": qs}}}} - result = processors.sanitize_http_request_querystring(elasticapm_client, data) - for val in (result["context"]["request"]["url"]["search"], result["context"]["request"]["url"]["full"]): - assert "api_key=[REDACTED]" in val - assert len(val) == 1024 - assert val.endswith(u"x…") - - @pytest.mark.parametrize( "elasticapm_client, expected", [ @@ -333,22 +309,6 @@ def test_post_as_string(elasticapm_client, expected, http_test_data): assert result["context"]["request"]["body"] == expected -@pytest.mark.parametrize( - "elasticapm_client, expected", - [ - ( - {"sanitize_field_names": BASE_SANITIZE_FIELD_NAMES_UNPROCESSED + ["custom_field"]}, - "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field={0}".format(processors.MASK), - ), - ({}, "foo=bar&password={0}&secret={0}&cc=1234567890098765&custom_field=123".format(processors.MASK)), - ], - indirect=["elasticapm_client"], -) -def test_querystring_as_string_with_partials(elasticapm_client, expected, http_test_data): - result = processors.sanitize_http_request_querystring(elasticapm_client, http_test_data) - assert result["context"]["request"]["url"]["search"] == expected - - def test_sanitize_dict(): result = processors._sanitize("foo", {1: 2}) assert result == {1: 2} @@ -356,9 +316,9 @@ def test_sanitize_dict(): def test_non_utf8_encoding(elasticapm_client, http_test_data): broken = compat.b("broken=") + u"aéöüa".encode("latin-1") - http_test_data["context"]["request"]["url"]["search"] = broken - result = processors.sanitize_http_request_querystring(elasticapm_client, http_test_data) - assert result["context"]["request"]["url"]["search"] == u"broken=a\ufffd\ufffd\ufffda" + http_test_data["context"]["request"]["headers"]["cookie"] = broken + result = processors.sanitize_http_request_cookies(elasticapm_client, http_test_data) + assert result["context"]["request"]["headers"]["cookie"] == u"broken=a\ufffd\ufffd\ufffda" def test_remove_stacktrace_locals(): From 89cd6136daf20790d5d205e453015f46ec3477e3 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Tue, 8 Dec 2020 15:40:30 -0700 Subject: [PATCH 7/9] Update pre-commit to match upstream black behavior --- .pre-commit-config.yaml | 8 ++++---- tests/processors/tests.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e797c5ae..65f216cf7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,15 @@ repos: - repo: https://github.com/pycqa/isort - rev: 5.5.4 + rev: 5.6.4 hooks: - id: isort - repo: https://github.com/ambv/black - rev: stable + rev: 20.8b1 hooks: - id: black language_version: python3 -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 hooks: - id: flake8 exclude: elasticapm\/utils\/wrapt|build|src|tests|dist|conftest.py|setup.py diff --git a/tests/processors/tests.py b/tests/processors/tests.py index a1ab583b3..b6440a021 100644 --- a/tests/processors/tests.py +++ b/tests/processors/tests.py @@ -50,7 +50,12 @@ def http_test_data(): "context": { "request": { "body": "foo=bar&password=123456&secret=abc&cc=1234567890098765&custom_field=123", - "env": {"foo": "bar", "password": "hello", "secret": "hello", "custom_env": "bye",}, + "env": { + "foo": "bar", + "password": "hello", + "secret": "hello", + "custom_env": "bye", + }, "headers": { "foo": "bar", "password": "hello", @@ -101,7 +106,14 @@ def test_stacktrace(elasticapm_client, custom_field): data = { "exception": { "stacktrace": [ - {"vars": {"foo": "bar", "password": "hello", "secret": "hello", "sensitive-stacktrace-val": "bye",}} + { + "vars": { + "foo": "bar", + "password": "hello", + "secret": "hello", + "sensitive-stacktrace-val": "bye", + } + } ], "cause": [ { From 95dcca1e69ad2408c94f54ebc39c9a81ac6a9313 Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 28 Jan 2021 11:18:32 -0700 Subject: [PATCH 8/9] Dynamically generate BASE_SANITIZE_FIELD_NAMES --- elasticapm/conf/constants.py | 41 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/elasticapm/conf/constants.py b/elasticapm/conf/constants.py index c3d791fa7..5a0533dd5 100644 --- a/elasticapm/conf/constants.py +++ b/elasticapm/conf/constants.py @@ -32,6 +32,31 @@ import re from collections import namedtuple + +def _starmatch_to_regex(pattern): + """ + This is a duplicate of starmatch_to_regex() in utils/__init__.py + + Duplication to avoid circular imports + """ + options = re.DOTALL + # check if we are case sensitive + if pattern.startswith("(?-i)"): + pattern = pattern[5:] + else: + options |= re.IGNORECASE + i, n = 0, len(pattern) + res = [] + while i < n: + c = pattern[i] + i = i + 1 + if c == "*": + res.append(".*") + else: + res.append(re.escape(c)) + return re.compile(r"(?:%s)\Z" % "".join(res), options) + + EVENTS_API_PATH = "intake/v2/events" AGENT_CONFIG_PATH = "config/v1/agents" @@ -59,20 +84,6 @@ HARDCODED_PROCESSORS = ["elasticapm.processors.add_context_lines_to_frames"] -BASE_SANITIZE_FIELD_NAMES = [ - re.compile("(?:password)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:passwd)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:pwd)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:secret)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:.*key)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:.*token.*)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:.*session.*)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:.*credit.*)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:.*card.*)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:authorization)\\Z", re.IGNORECASE | re.DOTALL), - re.compile("(?:set\\-cookie)\\Z", re.IGNORECASE | re.DOTALL), -] - BASE_SANITIZE_FIELD_NAMES_UNPROCESSED = [ "password", "passwd", @@ -87,6 +98,8 @@ "set-cookie", ] +BASE_SANITIZE_FIELD_NAMES = [_starmatch_to_regex(x) for x in BASE_SANITIZE_FIELD_NAMES_UNPROCESSED] + OUTCOME = namedtuple("OUTCOME", ["SUCCESS", "FAILURE", "UNKNOWN"])( SUCCESS="success", FAILURE="failure", UNKNOWN="unknown" ) From 690f70c030e10cd49b747cca03542db47a96749a Mon Sep 17 00:00:00 2001 From: Colton Myers Date: Thu, 28 Jan 2021 11:57:06 -0700 Subject: [PATCH 9/9] Fix the merge commit --- CHANGELOG.asciidoc | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 99259496f..d66125c76 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -30,10 +30,7 @@ endif::[] //===== Bug fixes // -<<<<<<< HEAD [float] -======= ->>>>>>> upstream/master === Unreleased // Unreleased changes go here