Skip to content

Commit d562fbe

Browse files
committed
added flag to disable capturing POST body, and default to false (#151)
closes #151
1 parent 6f029b3 commit d562fbe

File tree

12 files changed

+325
-59
lines changed

12 files changed

+325
-59
lines changed

CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ https://github.com/elastic/apm-agent-python/compare/v1.0.0\...master[Check the H
2424
* added options for configuring the amount of context lines that are captured with each frame ({pull}136[#136])
2525
* added support for tracing queries formatted as http://initd.org/psycopg/docs/sql.html[`psycopg2.sql.SQL`] objects ({pull}148[#148])
2626
* switched to `time.perf_counter` as timing function on Python 3 ({pull}138[#138])
27+
* added option to disable capturing of request body ({pull}151[#151])
2728
* BREAKING: Several settings and APIs have been renamed ({pull}111[#111], {pull}119[#119], {pull}143[#143]):
2829
** The decorator for custom instrumentation, `elasticapm.trace`, is now `elasticapm.capture_span`
2930
** The setting `traces_send_frequency` has been renamed to `flush_interval`.

docs/configuration.asciidoc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,29 @@ We differ between errors and spans, as well as library frames and app frames.
317317

318318
WARNING: Especially for spans, collecting source code can have a large impact on storage use in your Elasticsearch cluster.
319319

320+
[float]
321+
[[config-capture-body]]
322+
==== `capture_body`
323+
324+
|============
325+
| Environment | Django/Flask | Default
326+
| `ELASTIC_APM_CAPTURE_BODY` | `CAPTURE_BODY` | `off`
327+
|============
328+
329+
For transactions that are HTTP requests,
330+
the Python agent can optionally capture the request body (e.g. `POST` variables).
331+
332+
Possible values: `errors`, `transactions`, `all`, `off`.
333+
334+
If the request has a body and this setting is disabled, the body will be shown as `[REDACTED]`.
335+
336+
For requests with a content type of `multipart/form-data`,
337+
any uploaded files will be referenced in a special `_files` key.
338+
It contains the name of the field, and the name of the uploaded file, if provided.
339+
340+
WARNING: request bodies often contain sensitive values like passwords, credit card numbers etc.
341+
If your service handles data like this, we advise to only enable this feature with care.
342+
320343
[float]
321344
[[config-flush-interval]]
322345
==== `flush_interval`

elasticapm/conf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class Config(_ConfigBase):
166166
source_lines_span_library_frames = _ConfigValue('SOURCE_LINES_SPAN_LIBRARY_FRAMES', type=int, default=0)
167167
local_var_max_length = _ConfigValue('LOCAL_VAR_MAX_LENGTH', type=int, default=200)
168168
local_var_list_max_length = _ConfigValue('LOCAL_VAR_LIST_MAX_LENGTH', type=int, default=10)
169+
capture_body = _ConfigValue('CAPTURE_BODY', default='off')
169170
async_mode = _BoolConfigValue('ASYNC_MODE', default=True)
170171
instrument_django_middleware = _BoolConfigValue('INSTRUMENT_DJANGO_MIDDLEWARE', default=True)
171172
transactions_ignore_patterns = _ListConfigValue('TRANSACTIONS_IGNORE_PATTERNS', default=[])

elasticapm/contrib/django/client.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -94,27 +94,10 @@ def get_user_info(self, request):
9494

9595
return user_info
9696

97-
def get_data_from_request(self, request):
98-
if request.method != 'GET':
99-
try:
100-
if hasattr(request, 'body'):
101-
# Django 1.4+
102-
raw_data = request.body
103-
else:
104-
raw_data = request.raw_post_data
105-
data = raw_data if raw_data else request.POST
106-
except Exception:
107-
# assume we had a partial read:
108-
data = '<unavailable>'
109-
else:
110-
data = None
111-
112-
environ = request.META
113-
97+
def get_data_from_request(self, request, capture_body=False):
11498
result = {
115-
'body': data,
116-
'env': dict(get_environ(environ)),
117-
'headers': dict(get_headers(environ)),
99+
'env': dict(get_environ(request.META)),
100+
'headers': dict(get_headers(request.META)),
118101
'method': request.method,
119102
'socket': {
120103
'remote_address': request.META.get('REMOTE_ADDR'),
@@ -123,6 +106,22 @@ def get_data_from_request(self, request):
123106
'cookies': dict(request.COOKIES),
124107
}
125108

109+
if request.method not in ('GET', 'HEAD'):
110+
content_type = request.META.get('CONTENT_TYPE')
111+
if content_type == 'application/x-www-form-urlencoded':
112+
data = compat.multidict_to_dict(request.POST)
113+
elif content_type.startswith('multipart/form-data'):
114+
data = compat.multidict_to_dict(request.POST)
115+
if request.FILES:
116+
data['_files'] = {field: file.name for field, file in compat.iteritems(request.FILES)}
117+
else:
118+
try:
119+
data = request.body
120+
except Exception:
121+
data = '<unavailable>'
122+
123+
result['body'] = data if (capture_body or not data) else '[REDACTED]'
124+
126125
if hasattr(request, 'get_raw_uri'):
127126
# added in Django 1.9
128127
url = request.get_raw_uri()
@@ -158,7 +157,8 @@ def capture(self, event_type, request=None, **kwargs):
158157

159158
is_http_request = isinstance(request, HttpRequest)
160159
if is_http_request:
161-
context['request'] = self.get_data_from_request(request)
160+
context['request'] = self.get_data_from_request(request,
161+
capture_body=self.config.capture_body in ('all', 'errors'))
162162
context['user'] = self.get_user_info(request)
163163

164164
result = super(DjangoClient, self).capture(event_type, **kwargs)

elasticapm/contrib/django/middleware/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,10 @@ def process_response(self, request, response):
181181
transaction_name,
182182
request
183183
)
184-
request_data = lambda: self.client.get_data_from_request(request)
184+
request_data = lambda: self.client.get_data_from_request(
185+
request,
186+
capture_body=self.client.config.capture_body in ('all', 'transactions')
187+
)
185188
response_data = lambda: self.client.get_data_from_response(response)
186189
elasticapm.set_context(request_data, 'request')
187190
elasticapm.set_context(response_data, 'response')

elasticapm/contrib/flask/__init__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@ def handle_exception(self, *args, **kwargs):
8989

9090
self.client.capture_exception(
9191
exc_info=kwargs.get('exc_info'),
92-
context={'request': get_data_from_request(request)},
92+
context={'request': get_data_from_request(
93+
request,
94+
capture_body=self.client.config.capture_body in ('errors', 'all')
95+
)},
9396
custom={
9497
'app': self.app,
9598
},
@@ -133,8 +136,11 @@ def request_started(self, app):
133136
def request_finished(self, app, response):
134137
rule = request.url_rule.rule if request.url_rule is not None else ""
135138
rule = build_name_with_http_method_prefix(rule, request)
136-
request_data = get_data_from_request(request)
137-
response_data = get_data_from_response(response)
139+
request_data = lambda: get_data_from_request(
140+
request,
141+
capture_body=self.client.config.capture_body in ('transactions', 'all')
142+
)
143+
response_data = lambda: get_data_from_response(response)
138144
elasticapm.set_context(request_data, 'request')
139145
elasticapm.set_context(response_data, 'response')
140146
if response.status_code:

elasticapm/contrib/flask/utils.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,11 @@
1-
try:
2-
from urllib.parse import urlencode
3-
except ImportError:
4-
# Python 2
5-
from urllib import urlencode
1+
from werkzeug.exceptions import ClientDisconnected
62

73
from elasticapm.utils import compat, get_url_dict
84
from elasticapm.utils.wsgi import get_environ, get_headers
95

106

11-
def get_data_from_request(request):
12-
body = None
13-
if request.data:
14-
body = request.data
15-
elif request.form:
16-
body = urlencode(request.form)
17-
7+
def get_data_from_request(request, capture_body=False):
188
result = {
19-
'body': body,
209
'env': dict(get_environ(request.environ)),
2110
'headers': dict(
2211
get_headers(request.environ),
@@ -28,9 +17,27 @@ def get_data_from_request(request):
2817
},
2918
'cookies': request.cookies,
3019
}
20+
if request.method not in ('GET', 'HEAD'):
21+
body = None
22+
if request.content_type == 'application/x-www-form-urlencoded':
23+
body = compat.multidict_to_dict(request.form)
24+
elif request.content_type.startswith('multipart/form-data'):
25+
body = compat.multidict_to_dict(request.form)
26+
if request.files:
27+
body['_files'] = {
28+
field: val[0].filename if len(val) == 1 else [f.filename for f in val]
29+
for field, val in compat.iterlists(request.files)
30+
}
31+
else:
32+
try:
33+
body = request.data
34+
except ClientDisconnected:
35+
pass
36+
37+
if body is not None:
38+
result['body'] = body if capture_body else '[REDACTED]'
3139

3240
result['url'] = get_url_dict(request.url)
33-
3441
return result
3542

3643

elasticapm/utils/compat.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ def iterkeys(d, **kwargs):
6767

6868
def iteritems(d, **kwargs):
6969
return d.iteritems(**kwargs)
70+
71+
# for django.utils.datastructures.MultiValueDict
72+
def iterlists(d, **kw):
73+
return d.iterlists(**kw)
74+
7075
else:
7176
import io
7277
import queue # noqa F401
@@ -93,13 +98,15 @@ def iterkeys(d, **kwargs):
9398
def iteritems(d, **kwargs):
9499
return iter(d.items(**kwargs))
95100

101+
# for django.utils.datastructures.MultiValueDict
102+
def iterlists(d, **kw):
103+
return iter(d.lists(**kw))
104+
96105

97106
def get_default_library_patters():
98107
"""
99108
Returns library paths depending on the used platform.
100109
101-
TODO: ensure that this works correctly on Windows
102-
103110
:return: a list of glob paths
104111
"""
105112
python_version = platform.python_version_tuple()
@@ -114,3 +121,16 @@ def get_default_library_patters():
114121
if system == 'Windows':
115122
return [r'*\lib\*']
116123
return ['*/lib/python%s.%s/*' % python_version[:2], '*/lib64/python%s.%s/*' % python_version[:2]]
124+
125+
126+
def multidict_to_dict(d):
127+
"""
128+
Turns a werkzeug.MultiDict or django.MultiValueDict into a dict with
129+
list values
130+
:param d: a MultiDict or MultiValueDict instance
131+
:return: a dict instance
132+
"""
133+
return dict(
134+
(k, v[0] if len(v) == 1 else v)
135+
for k, v in iterlists(d)
136+
)

0 commit comments

Comments
 (0)