Skip to content

Commit 58ef57f

Browse files
Adding custom sentry sanitizer
1 parent 26bf209 commit 58ef57f

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

tests/server/test_sanitize.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import pytest
2+
from unittest import mock
3+
4+
from waterbutler.server.sanitize import WBSanitizer
5+
6+
7+
@pytest.fixture
8+
def sanitizer():
9+
return WBSanitizer(mock.Mock())
10+
11+
12+
class TestWBSanitizer:
13+
# The sanitize function changes some strings and dictionaries
14+
# you put into it, so you need to explicitly test most things
15+
16+
MASK = '*' * 8
17+
18+
def test_no_sanitization(self, sanitizer):
19+
assert sanitizer.sanitize('thing', 'ghost science') == 'ghost science'
20+
21+
def test_fields_sanitized(self, sanitizer):
22+
fields = sanitizer.FIELDS
23+
for field in fields:
24+
assert sanitizer.sanitize(field, 'free speech') == self.MASK
25+
26+
def test_value_is_none(self, sanitizer):
27+
assert sanitizer.sanitize('great hair', None) is None
28+
29+
def test_sanitize_credit_card(self, sanitizer):
30+
assert sanitizer.sanitize('credit', '424242424242424') == self.MASK
31+
assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK
32+
33+
def test_sanitize_dictionary(self, sanitizer):
34+
value_dict = {
35+
'great_entry': 'very much not a secret or credit card'
36+
}
37+
38+
result = sanitizer.sanitize('value_dict', value_dict)
39+
assert result == {
40+
'great_entry': 'very much not a secret or credit card'
41+
}
42+
43+
sanitize_dict = {
44+
'key': 'secret',
45+
'okay_value': 'bears are awesome'
46+
}
47+
result = result = sanitizer.sanitize('sanitize_dict', sanitize_dict)
48+
49+
# Sanity check
50+
assert result != {
51+
'key': 'secret',
52+
'okay_value': 'bears are awesome'
53+
}
54+
55+
assert result == {
56+
'key': '*' * 8,
57+
'okay_value': 'bears are awesome'
58+
}
59+
60+
def test_dataverse_secret(self, sanitizer):
61+
62+
# Named oddly because if you call it `dv_secret` it will get sanitized by a different
63+
# part of the sanitizer
64+
dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
65+
assert sanitizer.sanitize('dv_value', dv_value) == self.MASK
66+
67+
dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
68+
expected = 'random characters and other things ' + self.MASK
69+
assert sanitizer.sanitize('dv_value', dv_value) == expected
70+
71+
def test_bytes(self, sanitizer):
72+
key = b'key'
73+
assert sanitizer.sanitize(key, 'bossy yogurt') == self.MASK
74+
75+
other_key = b'should_be_safe'
76+
assert sanitizer.sanitize(other_key, 'snow science') == 'snow science'

waterbutler/server/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ def make_app(debug):
4545
[(r'/status', handlers.StatusHandler)],
4646
debug=debug,
4747
)
48-
app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=waterbutler.__version__)
48+
app.sentry_client = AsyncSentryClient(settings.SENTRY_DSN, release=waterbutler.__version__,
49+
processors=('waterbutler.server.sanitize.WBSanitizer',))
4950
return app
5051

5152

waterbutler/server/sanitize.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import re
2+
3+
from raven.processors import SanitizePasswordsProcessor
4+
5+
6+
class WBSanitizer(SanitizePasswordsProcessor):
7+
"""Asterisk out things that look like passwords, keys, etc."""
8+
9+
# Store mask as a fixed length for security
10+
MASK = '*' * 8
11+
12+
# Token and key added from original. Key is used by Dataverse
13+
FIELDS = frozenset([
14+
'password',
15+
'secret',
16+
'passwd',
17+
'authorization',
18+
'api_key',
19+
'apikey',
20+
'sentry_dsn',
21+
'access_token',
22+
'key',
23+
'token',
24+
])
25+
26+
# Credit card regex left intact from original processor
27+
# While we should never have credit card information, its still best to perform the check
28+
# and keep old functionality
29+
VALUES_RE = re.compile(r'^(?:\d[ -]*?){13,16}$')
30+
31+
# Should specifically match Dataverse secrets. Key format checked on demo and on Harvard
32+
DATAVERSE_SECRET_RE = re.compile(r'[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]'
33+
'{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}')
34+
35+
def sanitize(self, key, value):
36+
"""Overload the sanitize function of the `SanitizePasswordsProcessor'."""
37+
if value is None:
38+
return
39+
40+
# Part of the original method. Looks for credit cards to sanitize
41+
if isinstance(value, str) and self.VALUES_RE.search(value):
42+
return self.MASK
43+
44+
if isinstance(value, dict):
45+
for item in value:
46+
if item in self.FIELDS:
47+
value[item] = self.MASK
48+
49+
# Check for Dataverse secrets
50+
if isinstance(value, str):
51+
matches = self.DATAVERSE_SECRET_RE.findall(value)
52+
for match in matches:
53+
value = value.replace(match, self.MASK)
54+
55+
# key can be a NoneType
56+
if not key:
57+
return value
58+
59+
# Just in case we have bytes here, we want to turn them into text
60+
# properly without failing so we can perform our check.
61+
if isinstance(key, bytes):
62+
key = key.decode('utf-8', 'replace')
63+
else:
64+
key = str(key)
65+
66+
key = key.lower()
67+
for field in self.FIELDS:
68+
if field in key:
69+
return self.MASK
70+
71+
return value

0 commit comments

Comments
 (0)