Skip to content

Commit b776bc8

Browse files
Add sanitize nested lists and dictionaries
1 parent 58ef57f commit b776bc8

File tree

2 files changed

+147
-18
lines changed

2 files changed

+147
-18
lines changed

tests/server/test_sanitize.py

Lines changed: 136 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,36 @@ def test_fields_sanitized(self, sanitizer):
2626
def test_value_is_none(self, sanitizer):
2727
assert sanitizer.sanitize('great hair', None) is None
2828

29+
def test_key_is_none(self, sanitizer):
30+
assert sanitizer.sanitize(None, 'best day ever') is 'best day ever'
31+
2932
def test_sanitize_credit_card(self, sanitizer):
3033
assert sanitizer.sanitize('credit', '424242424242424') == self.MASK
34+
# This string is not censored since it is out of the range of what it considers
35+
# to be a credit card
3136
assert sanitizer.sanitize('credit', '4242424242424243333333') != self.MASK
3237

38+
def test_none_key_is_sanitized(self, sanitizer):
39+
assert sanitizer.sanitize(None, '424242424242424') == self.MASK
40+
# This string is not censored since it is out of the range of what it considers
41+
# to be a credit card
42+
assert sanitizer.sanitize(None, '4242424242424243333333') != self.MASK
43+
44+
def test_dataverse_secret(self, sanitizer):
45+
46+
# Named oddly because if you call it `dv_secret` it will get sanitized by a different
47+
# part of the sanitizer
48+
dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
49+
assert sanitizer.sanitize('dv_value', dv_value) == self.MASK
50+
51+
dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
52+
expected = 'random characters and other things ' + self.MASK
53+
assert sanitizer.sanitize('dv_value', dv_value) == expected
54+
55+
def test_bytes(self, sanitizer):
56+
assert sanitizer.sanitize(b'key', 'bossy yogurt') == self.MASK
57+
assert sanitizer.sanitize(b'should_be_safe', 'snow science') == 'snow science'
58+
3359
def test_sanitize_dictionary(self, sanitizer):
3460
value_dict = {
3561
'great_entry': 'very much not a secret or credit card'
@@ -44,7 +70,7 @@ def test_sanitize_dictionary(self, sanitizer):
4470
'key': 'secret',
4571
'okay_value': 'bears are awesome'
4672
}
47-
result = result = sanitizer.sanitize('sanitize_dict', sanitize_dict)
73+
result = sanitizer.sanitize('sanitize_dict', sanitize_dict)
4874

4975
# Sanity check
5076
assert result != {
@@ -53,24 +79,118 @@ def test_sanitize_dictionary(self, sanitizer):
5379
}
5480

5581
assert result == {
56-
'key': '*' * 8,
82+
'key': self.MASK,
5783
'okay_value': 'bears are awesome'
5884
}
5985

60-
def test_dataverse_secret(self, sanitizer):
61-
62-
# Named oddly because if you call it `dv_secret` it will get sanitized by a different
63-
# part of the sanitizer
64-
dv_value = 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
65-
assert sanitizer.sanitize('dv_value', dv_value) == self.MASK
86+
def test_nested_dictionary(self, sanitizer):
87+
value_dict = {
88+
'value': {
89+
'other': 'words',
90+
'key': 'this will be censored',
91+
'secret': {
92+
'secret': {
93+
'secret': 'pie is great'
94+
}
95+
},
96+
'new': 'best'
97+
}
98+
}
6699

67-
dv_value = 'random characters and other things aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
68-
expected = 'random characters and other things ' + self.MASK
69-
assert sanitizer.sanitize('dv_value', dv_value) == expected
100+
result = sanitizer.sanitize('value_dict', value_dict)
101+
assert result == {
102+
'value': {
103+
'other': 'words',
104+
'key': self.MASK,
105+
'secret': self.MASK,
106+
'new': 'best'
107+
}
108+
}
70109

71-
def test_bytes(self, sanitizer):
72-
key = b'key'
73-
assert sanitizer.sanitize(key, 'bossy yogurt') == self.MASK
110+
def test_nested_dictionary_with_list(self, sanitizer):
111+
value_dict = {
112+
'value': {
113+
'other': 'words',
114+
'key': 'this will be censored',
115+
'secret': {
116+
'value': ['bunch', 'of', 'semi', 'random', 'beige', 'run']
117+
118+
},
119+
'not_hidden': {
120+
'list_of_dict': [
121+
{'value': 'value'},
122+
{'key': 'secret'}
123+
]
124+
},
125+
'new': 'best'
126+
}
127+
}
128+
result = sanitizer.sanitize('value_dict', value_dict)
129+
assert result == {
130+
'value': {
131+
'other': 'words',
132+
'key': self.MASK,
133+
'secret': self.MASK,
134+
'not_hidden': {
135+
'list_of_dict': [
136+
{'value': 'value'},
137+
{'key': self.MASK}
138+
]
139+
},
140+
'new': 'best'
141+
}
142+
}
74143

75-
other_key = b'should_be_safe'
76-
assert sanitizer.sanitize(other_key, 'snow science') == 'snow science'
144+
def test_sanitize_list(self, sanitizer):
145+
value_list = [
146+
'blarg',
147+
'10',
148+
'key',
149+
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
150+
]
151+
152+
result = sanitizer.sanitize('value_list', value_list)
153+
154+
assert result == [
155+
'blarg',
156+
'10',
157+
'key',
158+
self.MASK
159+
]
160+
161+
def test_sanitize_nested_lists(self, sanitizer):
162+
value_list = [
163+
[
164+
'blarg',
165+
'10',
166+
'key',
167+
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
168+
],
169+
'blarg',
170+
'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc',
171+
[[[[[[[
172+
['check out this level of nested'], 'aaaaaaaa-bbbb-bbbb-bbbb-cccccccccccc'
173+
]]]]]]],
174+
{
175+
'key': 'red leaves',
176+
'secret': [[[[[[[[]]]]]]]]
177+
}
178+
]
179+
180+
result = sanitizer.sanitize('value_list', value_list)
181+
182+
assert result == [
183+
[
184+
'blarg',
185+
'10',
186+
'key',
187+
self.MASK
188+
],
189+
'blarg',
190+
self.MASK,
191+
[[[[[[[['check out this level of nested'], self.MASK]]]]]]],
192+
{
193+
'key': self.MASK,
194+
'secret': self.MASK
195+
}
196+
]

waterbutler/server/sanitize.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class WBSanitizer(SanitizePasswordsProcessor):
3434

3535
def sanitize(self, key, value):
3636
"""Overload the sanitize function of the `SanitizePasswordsProcessor'."""
37+
3738
if value is None:
3839
return
3940

@@ -43,8 +44,14 @@ def sanitize(self, key, value):
4344

4445
if isinstance(value, dict):
4546
for item in value:
46-
if item in self.FIELDS:
47-
value[item] = self.MASK
47+
value[item] = self.sanitize(item, value[item])
48+
49+
if isinstance(value, list):
50+
new_list = []
51+
for item in value:
52+
new_list.append(self.sanitize(key, item))
53+
54+
value = new_list
4855

4956
# Check for Dataverse secrets
5057
if isinstance(value, str):
@@ -53,12 +60,14 @@ def sanitize(self, key, value):
5360
value = value.replace(match, self.MASK)
5461

5562
# key can be a NoneType
63+
# This sould be after the regex checks incase a `None` key is a token
5664
if not key:
5765
return value
5866

5967
# Just in case we have bytes here, we want to turn them into text
6068
# properly without failing so we can perform our check.
6169
if isinstance(key, bytes):
70+
# May want a try/except block around this, but for now it should be okay
6271
key = key.decode('utf-8', 'replace')
6372
else:
6473
key = str(key)

0 commit comments

Comments
 (0)