Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.5.4
rev: 5.6.4
hooks:
- id: isort
- repo: https://github.com/ambv/black
rev: stable
rev: 20.8b1
hooks:
- id: black
language_version: python3
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
hooks:
- id: flake8
exclude: elasticapm\/utils\/wrapt|build|src|tests|dist|conftest.py|setup.py
Expand Down
26 changes: 25 additions & 1 deletion CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,30 @@ endif::[]
//===== Bug fixes
//

[float]
=== Unreleased

// Unreleased changes go here
// When the next release happens, nest these changes under the "Python Agent version 5.x" heading
[float]
===== Breaking changes

* Align `sanitize_field_names` config with the
https://github.com/elastic/apm/blob/3fa78e2a1eeea81c73c2e16e96dbf6b2e79f3c64/specs/agents/sanitization.md[cross-agent spec].
If you are using a non-default `sanitize_field_names`, surrounding each of your entries with stars (i.e.
`\*secret\*`) will retain the old behavior. {pull}982[#982]
* Remove credit card sanitization for field values. This improves performance, and the security value of this check was
dubious anyway. {pull}982[#982]
* Remove HTTP querystring sanitization. This improves performance, and is meant to standardize behavior across the
agents, as defined in https://github.com/elastic/apm/pull/334. {pull}982[#982]

[float]
===== Features

[float]
===== Bug fixes


[[release-notes-5.x]]
=== Python Agent version 5.x

Expand All @@ -45,7 +69,7 @@ endif::[]
* Implement `transaction_ignore_urls` config (supports central config) {pull}923[#923]
* Add public API to retrieve trace parent header {pull}956[#956]
* Added support for cgroup memory metrics {pull}846[#846]


[float]
===== Bug fixes
Expand Down
31 changes: 18 additions & 13 deletions docs/configuration.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,6 @@ to avoid stampedes of instances that start at the same time.
'elasticapm.processors.sanitize_http_request_cookies',
'elasticapm.processors.sanitize_http_headers',
'elasticapm.processors.sanitize_http_wsgi_env',
'elasticapm.processors.sanitize_http_request_querystring',
'elasticapm.processors.sanitize_http_request_body']`
|============

Expand All @@ -707,23 +706,29 @@ WARNING: We recommend always including the default set of validators if you cust
[[config-sanitize-field-names]]
==== `sanitize_field_names`

<<dynamic-configuration, image:./images/dynamic-config.svg[] >>

[options="header"]
|============
| Environment | Django/Flask | Default
| `ELASTIC_APM_SANITIZE_FIELD_NAMES` | `SANITIZE_FIELD_NAMES` | `['authorization',
'password',
'secret',
'passwd',
'token',
'api_key',
'access_token',
'sessionid']`
|============

A list of field names to mask when using processors.
| `ELASTIC_APM_SANITIZE_FIELD_NAMES` | `SANITIZE_FIELD_NAMES` | `["password",
"passwd",
"pwd",
"secret",
"*key",
"*token*",
"*session*",
"*credit*",
"*card*",
"authorization",
"set-cookie"]`
|============

A list of glob-matched field names to match and mask when using processors.
For more information, see <<sanitizing-data, Sanitizing Data>>.

WARNING: We recommend always including the default set of field names if you customize this setting.
WARNING: We recommend always including the default set of field name matches
if you customize this setting.


[float]
Expand Down
1 change: 0 additions & 1 deletion docs/sanitizing-data.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ ELASTIC_APM = {
'elasticapm.processors.sanitize_http_request_cookies',
'elasticapm.processors.sanitize_http_headers',
'elasticapm.processors.sanitize_http_wsgi_env',
'elasticapm.processors.sanitize_http_request_querystring',
'elasticapm.processors.sanitize_http_request_body',
),
}
Expand Down
5 changes: 3 additions & 2 deletions elasticapm/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,11 +503,12 @@ class Config(_ConfigBase):
"elasticapm.processors.sanitize_http_response_cookies",
"elasticapm.processors.sanitize_http_headers",
"elasticapm.processors.sanitize_http_wsgi_env",
"elasticapm.processors.sanitize_http_request_querystring",
"elasticapm.processors.sanitize_http_request_body",
],
)
sanitize_field_names = _ListConfigValue("SANITIZE_FIELD_NAMES", default=BASE_SANITIZE_FIELD_NAMES)
sanitize_field_names = _ListConfigValue(
"SANITIZE_FIELD_NAMES", type=starmatch_to_regex, default=BASE_SANITIZE_FIELD_NAMES
)
metrics_sets = _ListConfigValue(
"METRICS_SETS",
default=[
Expand Down
31 changes: 24 additions & 7 deletions elasticapm/conf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

HTTP_WITH_BODY = {"POST", "PUT", "PATCH", "DELETE"}

MASK = 8 * "*"
MASK = "[REDACTED]"

EXCEPTION_CHAIN_MAX_DEPTH = 50

Expand All @@ -60,14 +60,31 @@
HARDCODED_PROCESSORS = ["elasticapm.processors.add_context_lines_to_frames"]

BASE_SANITIZE_FIELD_NAMES = [
"authorization",
re.compile("(?:password)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:passwd)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:pwd)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:secret)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:.*key)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:.*token.*)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:.*session.*)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:.*credit.*)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:.*card.*)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:authorization)\\Z", re.IGNORECASE | re.DOTALL),
re.compile("(?:set\\-cookie)\\Z", re.IGNORECASE | re.DOTALL),
]

BASE_SANITIZE_FIELD_NAMES_UNPROCESSED = [
"password",
"secret",
"passwd",
"token",
"api_key",
"access_token",
"sessionid",
"pwd",
"secret",
"*key",
"*token*",
"*session*",
"*credit*",
"*card*",
"authorization",
"set-cookie",
]

OUTCOME = namedtuple("OUTCOME", ["SUCCESS", "FAILURE", "UNKNOWN"])(
Expand Down
44 changes: 6 additions & 38 deletions elasticapm/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,14 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE


import re
import warnings
from collections import defaultdict

from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, ERROR, MASK, SPAN, TRANSACTION
from elasticapm.utils import compat, varmap
from elasticapm.utils.encoding import force_text, keyword_field
from elasticapm.utils.encoding import force_text
from elasticapm.utils.stacks import get_lines_from_file

SANITIZE_VALUE_PATTERNS = [re.compile(r"^[- \d]{16,19}$")] # credit card numbers, with or without spacers


def for_events(*events):
"""
Expand Down Expand Up @@ -116,7 +113,7 @@ def sanitize_http_request_cookies(client, event):

# sanitize request.header.cookie string
try:
cookie_string = event["context"]["request"]["headers"]["cookie"]
cookie_string = force_text(event["context"]["request"]["headers"]["cookie"], errors="replace")
event["context"]["request"]["headers"]["cookie"] = _sanitize_string(
cookie_string, "; ", "=", sanitize_field_names=client.config.sanitize_field_names
)
Expand All @@ -134,7 +131,7 @@ def sanitize_http_response_cookies(client, event):
:return: The modified event
"""
try:
cookie_string = event["context"]["response"]["headers"]["set-cookie"]
cookie_string = force_text(event["context"]["response"]["headers"]["set-cookie"], errors="replace")
event["context"]["response"]["headers"]["set-cookie"] = _sanitize_string(
cookie_string, ";", "=", sanitize_field_names=client.config.sanitize_field_names
)
Expand Down Expand Up @@ -190,32 +187,6 @@ def sanitize_http_wsgi_env(client, event):
return event


@for_events(ERROR, TRANSACTION)
def sanitize_http_request_querystring(client, event):
"""
Sanitizes http request query string
:param client: an ElasticAPM client
:param event: a transaction or error event
:return: The modified event
"""
try:
query_string = force_text(event["context"]["request"]["url"]["search"], errors="replace")
except (KeyError, TypeError):
return event
if "=" in query_string:
sanitized_query_string = _sanitize_string(
query_string, "&", "=", sanitize_field_names=client.config.sanitize_field_names
)
full_url = event["context"]["request"]["url"]["full"]
# we need to pipe the sanitized string through encoding.keyword_field to ensure that the maximum
# length of keyword fields is still ensured.
event["context"]["request"]["url"]["search"] = keyword_field(sanitized_query_string)
event["context"]["request"]["url"]["full"] = keyword_field(
full_url.replace(query_string, sanitized_query_string)
)
return event


@for_events(ERROR, TRANSACTION)
def sanitize_http_request_body(client, event):
"""
Expand Down Expand Up @@ -276,16 +247,13 @@ def mark_in_app_frames(client, event):

def _sanitize(key, value, **kwargs):
if "sanitize_field_names" in kwargs:
sanitize_field_names = frozenset(kwargs["sanitize_field_names"])
sanitize_field_names = kwargs["sanitize_field_names"]
else:
sanitize_field_names = frozenset(BASE_SANITIZE_FIELD_NAMES)
sanitize_field_names = BASE_SANITIZE_FIELD_NAMES

if value is None:
return

if isinstance(value, compat.string_types) and any(pattern.match(value) for pattern in SANITIZE_VALUE_PATTERNS):
return MASK

if isinstance(value, dict):
# varmap will call _sanitize on each k:v pair of the dict, so we don't
# have to do anything with dicts here
Expand All @@ -296,7 +264,7 @@ def _sanitize(key, value, **kwargs):

key = key.lower()
for field in sanitize_field_names:
if field in key:
if field.match(key.strip()):
# store mask as a fixed length for security
return MASK
return value
Expand Down
Loading