Skip to content

Commit 7e93862

Browse files
AumitLeonbasepi
andauthored
Config sanitize field names (#860)
* feat: configurable SANITIZE_FIELD_NAMES * tests: tests for configurable SANITIZE_FIELD_NAMES * refactor: wrap a class around field name operations * chore: fixes * Send client, not transport, to processors * fix: update sanitize_stacktrace_locals to use config fields * Cleanup processors Co-authored-by: Colton Myers <[email protected]> * fix: pass sanitize_field_names to _sanitize_string * chore: cleanup, tests, and doc updates * chore: reorder docs statements Co-authored-by: Colton Myers <[email protected]>
1 parent 18ebe4c commit 7e93862

File tree

9 files changed

+263
-78
lines changed

9 files changed

+263
-78
lines changed

docs/configuration.asciidoc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,28 @@ For more information, see <<sanitizing-data, Sanitizing Data>>.
636636

637637
WARNING: We recommend to always include the default set of validators if you customize this setting.
638638

639+
[float]
640+
[[config-sanitize-field-names]]
641+
==== `sanitize_field_names`
642+
643+
[options="header"]
644+
|============
645+
| Environment | Django/Flask | Default
646+
| `ELASTIC_APM_SANITIZE_FIELD_NAMES` | `SANITIZE_FIELD_NAMES` | `['authorization',
647+
'password',
648+
'secret',
649+
'passwd',
650+
'token',
651+
'api_key',
652+
'access_token',
653+
'sessionid']`
654+
|============
655+
656+
A list of field names to mask when using processors.
657+
For more information, see <<sanitizing-data, Sanitizing Data>>.
658+
659+
WARNING: We recommend to always include the default set of field names if you customize this setting.
660+
639661

640662
[float]
641663
[[config-transaction-sample-rate]]

docs/sanitizing-data.asciidoc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,28 @@ ELASTIC_APM = {
4848
----
4949

5050
NOTE: We recommend to use the above list of processors that sanitize passwords and secrets in different places of the event object.
51+
52+
The default set of processors sanitize fields based on a set of defaults defined in `elasticapm.conf.constants`. This set can be configured with the `SANATIZE_FIELD_NAMES` configuration option. For example, if your application produces a sensitive field called `My-Sensitive-Field`, the default processors can be used to automatically sanitize this field. You can specify what fields to santize within default processors like this:
53+
54+
[source,python]
55+
----
56+
ELASTIC_APM = {
57+
'APP_NAME': '<APP-NAME>',
58+
'SECRET_TOKEN': '<SECRET-TOKEN>',
59+
'SANITIZE_FIELD_NAMES': (
60+
'My-Sensitive-Field',
61+
'authorization',
62+
'password',
63+
'secret',
64+
'passwd',
65+
'token',
66+
'api_key',
67+
'access_token',
68+
'sessionid',
69+
),
70+
}
71+
----
72+
73+
NOTE: We recommend to use the above list of fields to sanitize various parts of the event object in addition to your specified fields.
74+
75+
When choosing fields names to sanitize, you can specify values that will match certain wildcards. For example, passing `base` as a field name to be sanitized will also sanitize all fields whose names match the regex pattern `\*base*`.

elasticapm/conf/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import socket
3636
import threading
3737

38+
from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES
3839
from elasticapm.utils import compat, starmatch_to_regex
3940
from elasticapm.utils.logging import get_logger
4041
from elasticapm.utils.threading import IntervalTimer, ThreadManager
@@ -286,6 +287,7 @@ class Config(_ConfigBase):
286287
"elasticapm.processors.sanitize_http_request_body",
287288
],
288289
)
290+
sanitize_field_names = _ListConfigValue("SANITIZE_FIELD_NAMES", default=BASE_SANITIZE_FIELD_NAMES)
289291
metrics_sets = _ListConfigValue(
290292
"METRICS_SETS",
291293
default=[

elasticapm/conf/constants.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@
5858

5959
HARDCODED_PROCESSORS = ["elasticapm.processors.add_context_lines_to_frames"]
6060

61+
BASE_SANITIZE_FIELD_NAMES = [
62+
"authorization",
63+
"password",
64+
"secret",
65+
"passwd",
66+
"token",
67+
"api_key",
68+
"access_token",
69+
"sessionid",
70+
]
71+
6172
try:
6273
# Python 2
6374
LABEL_TYPES = (bool, int, long, float, decimal.Decimal)

elasticapm/processors.py

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,17 @@
3333
import warnings
3434
from collections import defaultdict
3535

36-
from elasticapm.conf.constants import ERROR, MASK, SPAN, TRANSACTION
36+
from elasticapm.conf.constants import BASE_SANITIZE_FIELD_NAMES, ERROR, MASK, SPAN, TRANSACTION
3737
from elasticapm.utils import compat, varmap
3838
from elasticapm.utils.encoding import force_text
3939
from elasticapm.utils.stacks import get_lines_from_file
4040

41-
SANITIZE_FIELD_NAMES = frozenset(
42-
["authorization", "password", "secret", "passwd", "token", "api_key", "access_token", "sessionid"]
43-
)
44-
4541
SANITIZE_VALUE_PATTERNS = [re.compile(r"^[- \d]{16,19}$")] # credit card numbers, with or without spacers
4642

4743

4844
def for_events(*events):
4945
"""
5046
:param events: list of event types
51-
5247
Only calls wrapped function if given event_type is in list of events
5348
"""
5449
events = set(events)
@@ -64,7 +59,6 @@ def wrap(func):
6459
def remove_http_request_body(client, event):
6560
"""
6661
Removes request.body from context
67-
6862
:param client: an ElasticAPM client
6963
:param event: a transaction or error event
7064
:return: The modified event
@@ -78,7 +72,6 @@ def remove_http_request_body(client, event):
7872
def remove_stacktrace_locals(client, event):
7973
"""
8074
Removes local variables from any frames.
81-
8275
:param client: an ElasticAPM client
8376
:param event: a transaction or error event
8477
:return: The modified event
@@ -91,15 +84,14 @@ def remove_stacktrace_locals(client, event):
9184
def sanitize_stacktrace_locals(client, event):
9285
"""
9386
Sanitizes local variables in all frames
94-
9587
:param client: an ElasticAPM client
9688
:param event: a transaction or error event
9789
:return: The modified event
9890
"""
9991

10092
def func(frame):
10193
if "vars" in frame:
102-
frame["vars"] = varmap(_sanitize, frame["vars"])
94+
frame["vars"] = varmap(_sanitize, frame["vars"], sanitize_field_names=client.config.sanitize_field_names)
10395

10496
return _process_stack_frames(event, func)
10597

@@ -108,7 +100,6 @@ def func(frame):
108100
def sanitize_http_request_cookies(client, event):
109101
"""
110102
Sanitizes http request cookies
111-
112103
:param client: an ElasticAPM client
113104
:param event: a transaction or error event
114105
:return: The modified event
@@ -117,14 +108,18 @@ def sanitize_http_request_cookies(client, event):
117108
# sanitize request.cookies dict
118109
try:
119110
cookies = event["context"]["request"]["cookies"]
120-
event["context"]["request"]["cookies"] = varmap(_sanitize, cookies)
111+
event["context"]["request"]["cookies"] = varmap(
112+
_sanitize, cookies, sanitize_field_names=client.config.sanitize_field_names
113+
)
121114
except (KeyError, TypeError):
122115
pass
123116

124117
# sanitize request.header.cookie string
125118
try:
126119
cookie_string = event["context"]["request"]["headers"]["cookie"]
127-
event["context"]["request"]["headers"]["cookie"] = _sanitize_string(cookie_string, "; ", "=")
120+
event["context"]["request"]["headers"]["cookie"] = _sanitize_string(
121+
cookie_string, "; ", "=", sanitize_field_names=client.config.sanitize_field_names
122+
)
128123
except (KeyError, TypeError):
129124
pass
130125
return event
@@ -140,7 +135,9 @@ def sanitize_http_response_cookies(client, event):
140135
"""
141136
try:
142137
cookie_string = event["context"]["response"]["headers"]["set-cookie"]
143-
event["context"]["response"]["headers"]["set-cookie"] = _sanitize_string(cookie_string, ";", "=")
138+
event["context"]["response"]["headers"]["set-cookie"] = _sanitize_string(
139+
cookie_string, ";", "=", sanitize_field_names=client.config.sanitize_field_names
140+
)
144141
except (KeyError, TypeError):
145142
pass
146143
return event
@@ -150,22 +147,25 @@ def sanitize_http_response_cookies(client, event):
150147
def sanitize_http_headers(client, event):
151148
"""
152149
Sanitizes http request/response headers
153-
154150
:param client: an ElasticAPM client
155151
:param event: a transaction or error event
156152
:return: The modified event
157153
"""
158154
# request headers
159155
try:
160156
headers = event["context"]["request"]["headers"]
161-
event["context"]["request"]["headers"] = varmap(_sanitize, headers)
157+
event["context"]["request"]["headers"] = varmap(
158+
_sanitize, headers, sanitize_field_names=client.config.sanitize_field_names
159+
)
162160
except (KeyError, TypeError):
163161
pass
164162

165163
# response headers
166164
try:
167165
headers = event["context"]["response"]["headers"]
168-
event["context"]["response"]["headers"] = varmap(_sanitize, headers)
166+
event["context"]["response"]["headers"] = varmap(
167+
_sanitize, headers, sanitize_field_names=client.config.sanitize_field_names
168+
)
169169
except (KeyError, TypeError):
170170
pass
171171

@@ -176,14 +176,15 @@ def sanitize_http_headers(client, event):
176176
def sanitize_http_wsgi_env(client, event):
177177
"""
178178
Sanitizes WSGI environment variables
179-
180179
:param client: an ElasticAPM client
181180
:param event: a transaction or error event
182181
:return: The modified event
183182
"""
184183
try:
185184
env = event["context"]["request"]["env"]
186-
event["context"]["request"]["env"] = varmap(_sanitize, env)
185+
event["context"]["request"]["env"] = varmap(
186+
_sanitize, env, sanitize_field_names=client.config.sanitize_field_names
187+
)
187188
except (KeyError, TypeError):
188189
pass
189190
return event
@@ -193,7 +194,6 @@ def sanitize_http_wsgi_env(client, event):
193194
def sanitize_http_request_querystring(client, event):
194195
"""
195196
Sanitizes http request query string
196-
197197
:param client: an ElasticAPM client
198198
:param event: a transaction or error event
199199
:return: The modified event
@@ -203,7 +203,9 @@ def sanitize_http_request_querystring(client, event):
203203
except (KeyError, TypeError):
204204
return event
205205
if "=" in query_string:
206-
sanitized_query_string = _sanitize_string(query_string, "&", "=")
206+
sanitized_query_string = _sanitize_string(
207+
query_string, "&", "=", sanitize_field_names=client.config.sanitize_field_names
208+
)
207209
full_url = event["context"]["request"]["url"]["full"]
208210
event["context"]["request"]["url"]["search"] = sanitized_query_string
209211
event["context"]["request"]["url"]["full"] = full_url.replace(query_string, sanitized_query_string)
@@ -216,7 +218,6 @@ def sanitize_http_request_body(client, event):
216218
Sanitizes http request body. This only works if the request body
217219
is a query-encoded string. Other types (e.g. JSON) are not handled by
218220
this sanitizer.
219-
220221
:param client: an ElasticAPM client
221222
:param event: a transaction or error event
222223
:return: The modified event
@@ -226,7 +227,9 @@ def sanitize_http_request_body(client, event):
226227
except (KeyError, TypeError):
227228
return event
228229
if "=" in body:
229-
sanitized_query_string = _sanitize_string(body, "&", "=")
230+
sanitized_query_string = _sanitize_string(
231+
body, "&", "=", sanitize_field_names=client.config.sanitize_field_names
232+
)
230233
event["context"]["request"]["body"] = sanitized_query_string
231234
return event
232235

@@ -267,7 +270,12 @@ def mark_in_app_frames(client, event):
267270
return event
268271

269272

270-
def _sanitize(key, value):
273+
def _sanitize(key, value, **kwargs):
274+
if "sanitize_field_names" in kwargs:
275+
sanitize_field_names = frozenset(kwargs["sanitize_field_names"])
276+
else:
277+
sanitize_field_names = frozenset(BASE_SANITIZE_FIELD_NAMES)
278+
271279
if value is None:
272280
return
273281

@@ -283,27 +291,28 @@ def _sanitize(key, value):
283291
return value
284292

285293
key = key.lower()
286-
for field in SANITIZE_FIELD_NAMES:
294+
for field in sanitize_field_names:
287295
if field in key:
288296
# store mask as a fixed length for security
289297
return MASK
290298
return value
291299

292300

293-
def _sanitize_string(unsanitized, itemsep, kvsep):
301+
def _sanitize_string(unsanitized, itemsep, kvsep, sanitize_field_names=BASE_SANITIZE_FIELD_NAMES):
294302
"""
295303
sanitizes a string that contains multiple key/value items
296304
:param unsanitized: the unsanitized string
297305
:param itemsep: string that separates items
298306
:param kvsep: string that separates key from value
307+
:param sanitize_field_names: field names to pass to _sanitize
299308
:return: a sanitized string
300309
"""
301310
sanitized = []
302311
kvs = unsanitized.split(itemsep)
303312
for kv in kvs:
304313
kv = kv.split(kvsep)
305314
if len(kv) == 2:
306-
sanitized.append((kv[0], _sanitize(kv[0], kv[1])))
315+
sanitized.append((kv[0], _sanitize(kv[0], kv[1], sanitize_field_names=sanitize_field_names)))
307316
else:
308317
sanitized.append(kv)
309318
return itemsep.join(kvsep.join(kv) for kv in sanitized)

elasticapm/transport/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def _process_event(self, event_type, data):
176176
# Run the data through processors
177177
for processor in self._processors:
178178
if not hasattr(processor, "event_types") or event_type in processor.event_types:
179-
data = processor(self, data)
179+
data = processor(self.client, data)
180180
if not data:
181181
logger.debug(
182182
"Dropped event of type %s due to processor %s.%s",

elasticapm/utils/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
default_ports = {"https": 443, "http": 80, "postgresql": 5432, "mysql": 3306, "mssql": 1433}
4949

5050

51-
def varmap(func, var, context=None, name=None):
51+
def varmap(func, var, context=None, name=None, **kwargs):
5252
"""
5353
Executes ``func(key_name, value)`` on all values,
5454
recursively discovering dict and list scoped
@@ -58,14 +58,14 @@ def varmap(func, var, context=None, name=None):
5858
context = set()
5959
objid = id(var)
6060
if objid in context:
61-
return func(name, "<...>")
61+
return func(name, "<...>", **kwargs)
6262
context.add(objid)
6363
if isinstance(var, dict):
64-
ret = func(name, dict((k, varmap(func, v, context, k)) for k, v in compat.iteritems(var)))
64+
ret = func(name, dict((k, varmap(func, v, context, k, **kwargs)) for k, v in compat.iteritems(var)), **kwargs)
6565
elif isinstance(var, (list, tuple)):
66-
ret = func(name, [varmap(func, f, context, name) for f in var])
66+
ret = func(name, [varmap(func, f, context, name, **kwargs) for f in var], **kwargs)
6767
else:
68-
ret = func(name, var)
68+
ret = func(name, var, **kwargs)
6969
context.remove(objid)
7070
return ret
7171

elasticapm/utils/encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def to_string(value):
179179
return to_unicode(value).encode("utf-8")
180180

181181

182-
def shorten(var, list_length=50, string_length=200, dict_length=50):
182+
def shorten(var, list_length=50, string_length=200, dict_length=50, **kwargs):
183183
"""
184184
Shorten a given variable based on configurable maximum lengths, leaving
185185
breadcrumbs in the object to show that it was shortened.

0 commit comments

Comments
 (0)