Skip to content

Commit 2349c0e

Browse files
authored
Add support for HTTP secrets (#107)
* Add support for HTTP secrets (and tests) * Set defaults for secrets in sensor * Apply secrets to HTTP instrumentation * Report separated params * More safeties & test cases * Add safeties against bad matchers and keyword lists * Add exception handling
1 parent 801c549 commit 2349c0e

File tree

10 files changed

+300
-13
lines changed

10 files changed

+300
-13
lines changed

instana/agent.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class Agent(object):
3434
last_fork_check = None
3535
_boot_pid = os.getpid()
3636
extra_headers = None
37+
secrets_matcher = 'contains-ignore-case'
38+
secrets_list = ['key', 'password', 'secret']
3739
client = requests.Session()
3840

3941
def __init__(self):
@@ -69,7 +71,7 @@ def can_send(self):
6971
self.handle_fork()
7072
return False
7173

72-
if (self.fsm.fsm.current == "good2go"):
74+
if self.fsm.fsm.current == "good2go":
7375
return True
7476

7577
return False
@@ -82,6 +84,10 @@ def set_from(self, json_string):
8284

8385
res_data = json.loads(raw_json)
8486

87+
if "secrets" in res_data:
88+
self.secrets_matcher = res_data['secrets']['matcher']
89+
self.secrets_list = res_data['secrets']['list']
90+
8591
if "extraHeaders" in res_data:
8692
self.extra_headers = res_data['extraHeaders']
8793
logger.info("Will also capture these custom headers: %s", self.extra_headers)

instana/instrumentation/django/middleware.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from ...log import logger
1010
from ...singletons import agent, tracer
11+
from ...util import strip_secrets
1112

1213
DJ_INSTANA_MIDDLEWARE = 'instana.instrumentation.django.middleware.InstanaMiddleware'
1314

@@ -44,7 +45,8 @@ def process_request(self, request):
4445
if 'PATH_INFO' in env:
4546
request.iscope.span.set_tag(ext.HTTP_URL, env['PATH_INFO'])
4647
if 'QUERY_STRING' in env and len(env['QUERY_STRING']):
47-
request.iscope.span.set_tag("http.params", env['QUERY_STRING'])
48+
scrubbed_params = strip_secrets(env['QUERY_STRING'], agent.secrets_matcher, agent.secrets_list)
49+
request.iscope.span.set_tag("http.params", scrubbed_params)
4850
if 'HTTP_HOST' in env:
4951
request.iscope.span.set_tag("http.host", env['HTTP_HOST'])
5052
except Exception:

instana/instrumentation/urllib3.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import wrapt
66

77
from ..log import logger
8-
from ..singletons import tracer
8+
from ..singletons import agent, tracer
9+
from ..util import strip_secrets
910

1011
try:
1112
import urllib3 # noqa
@@ -20,13 +21,16 @@ def collect(instance, args, kwargs):
2021

2122
if args is not None and len(args) is 2:
2223
kvs['method'] = args[0]
23-
kvs['path'] = args[1]
24+
kvs['path'] = strip_secrets(args[1], agent.secrets_matcher, agent.secrets_list)
2425
else:
2526
kvs['method'] = kwargs.get('method')
2627
kvs['path'] = kwargs.get('path')
2728
if kvs['path'] is None:
2829
kvs['path'] = kwargs.get('url')
2930

31+
# Strip any secrets from potential query params
32+
kvs['path'] = strip_secrets(kvs['path'], agent.secrets_matcher, agent.secrets_list)
33+
3034
if type(instance) is urllib3.connectionpool.HTTPSConnectionPool:
3135
kvs['url'] = 'https://%s:%d%s' % (kvs['host'], kvs['port'], kvs['path'])
3236
else:

instana/json_span.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(self, **kwds):
4343
class HttpData(object):
4444
host = None
4545
url = None
46+
params = None
4647
status = 0
4748
method = None
4849
path_tpl = None

instana/recorder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def build_registered_span(self, span):
104104
if span.operation_name in self.http_spans:
105105
data.http = HttpData(host=self.get_http_host_name(span),
106106
url=span.tags.pop(ext.HTTP_URL, None),
107+
params=span.tags.pop('http.params', None),
107108
method=span.tags.pop(ext.HTTP_METHOD, None),
108109
status=span.tags.pop(ext.HTTP_STATUS_CODE, None),
109110
path_tpl=span.tags.pop("http.path_tpl", None),
@@ -129,7 +130,6 @@ def build_registered_span(self, span):
129130
url=span.tags.pop('sqlalchemy.url', None),
130131
err=span.tags.pop('sqlalchemy.err', None))
131132

132-
133133
if span.operation_name == "soap":
134134
data.soap = SoapData(action=span.tags.pop('soap.action', None))
135135

instana/singletons.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@
1717
#
1818
tracer = InstanaTracer()
1919

20-
# Set ourselves as the tracer.
20+
# Set ourselves as the tracer.
2121
opentracing.tracer = tracer

instana/util.py

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
import time
99

1010
import pkg_resources
11+
from urllib import parse
1112

1213
from .log import logger
1314

15+
1416
if sys.version_info.major is 2:
1517
string_types = basestring
1618
else:
@@ -28,7 +30,7 @@ def generate_id():
2830
global _current_pid
2931

3032
pid = os.getpid()
31-
if (_current_pid != pid):
33+
if _current_pid != pid:
3234
_current_pid = pid
3335
_rnd.seed(int(1000000 * time.time()) ^ pid)
3436
return _rnd.randint(-9223372036854775808, 9223372036854775807)
@@ -41,8 +43,8 @@ def id_to_header(id):
4143
if not isinstance(id, int):
4244
return BAD_ID_HEADER
4345

44-
byteString = struct.pack('>q', id)
45-
return str(binascii.hexlify(byteString).decode('UTF-8').lstrip('0'))
46+
byte_string = struct.pack('>q', id)
47+
return str(binascii.hexlify(byte_string).decode('UTF-8').lstrip('0'))
4648
except Exception as e:
4749
logger.debug(e)
4850
return BAD_ID_HEADER
@@ -75,15 +77,90 @@ def to_json(obj):
7577

7678

7779
def package_version():
80+
version = ""
7881
try:
79-
version = ""
8082
version = pkg_resources.get_distribution('instana').version
8183
except pkg_resources.DistributionNotFound:
8284
version = 'unknown'
8385
finally:
8486
return version
8587

8688

89+
def strip_secrets(qp, matcher, kwlist):
90+
"""
91+
This function will scrub the secrets from a query param string based on the passed in matcher and kwlist.
92+
93+
blah=1&secret=password&valid=true will result in blah=1&secret=<redacted>&valid=true
94+
95+
You can even pass in path query combinations:
96+
97+
/signup?blah=1&secret=password&valid=true will result in /signup?blah=1&secret=<redacted>&valid=true
98+
99+
:param qp: a string representing the query params in URL form (unencoded)
100+
:param matcher: the matcher to use
101+
:param kwlist: the list of keywords to match
102+
:return: a scrubbed query param string
103+
"""
104+
path = None
105+
106+
try:
107+
if qp is None:
108+
return ''
109+
110+
if type(kwlist) is not list:
111+
logger.debug("strip_secrets: bad keyword list")
112+
return qp
113+
114+
# If there are no key=values, then just return
115+
if not '=' in qp:
116+
return qp
117+
118+
if '?' in qp:
119+
path, query = qp.split('?')
120+
else:
121+
query = qp
122+
123+
params = parse.parse_qs(query, keep_blank_values=True)
124+
redacted = ['<redacted>']
125+
126+
if matcher == 'equals-ignore-case':
127+
for keyword in kwlist:
128+
for key in params.keys():
129+
if key.lower() == keyword.lower():
130+
params[key] = redacted
131+
elif matcher == 'equals':
132+
for keyword in kwlist:
133+
if keyword in params:
134+
params[keyword] = redacted
135+
elif matcher == 'contains-ignore-case':
136+
for keyword in kwlist:
137+
for key in params.keys():
138+
if keyword.lower() in key.lower():
139+
params[key] = redacted
140+
elif matcher == 'contains':
141+
for keyword in kwlist:
142+
for key in params.keys():
143+
if keyword in key:
144+
params[key] = redacted
145+
elif matcher == 'regex':
146+
for regexp in kwlist:
147+
for key in params.keys():
148+
if re.match(regexp, key):
149+
params[key] = redacted
150+
else:
151+
logger.debug("strip_secrets: unknown matcher")
152+
return qp
153+
154+
result = parse.urlencode(params, doseq=True)
155+
query = parse.unquote(result)
156+
157+
if path:
158+
query = path + '?' + query
159+
160+
return query
161+
except:
162+
logger.debug("strip_secrets", exc_info=True)
163+
87164
def get_py_source(file):
88165
"""
89166
Retrieves and returns the source code for any Python

instana/wsgi.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import opentracing.ext.tags as tags
55

66
from .singletons import agent, tracer
7+
from .util import strip_secrets
78

89

910
class iWSGIMiddleware(object):
@@ -47,7 +48,8 @@ def new_start_response(status, headers, exc_info=None):
4748
if 'PATH_INFO' in env:
4849
self.scope.span.set_tag(tags.HTTP_URL, env['PATH_INFO'])
4950
if 'QUERY_STRING' in env and len(env['QUERY_STRING']):
50-
self.scope.span.set_tag("http.params", env['QUERY_STRING'])
51+
scrubbed_params = strip_secrets(env['QUERY_STRING'], agent.secrets_matcher, agent.secrets_list)
52+
self.scope.span.set_tag("http.params", scrubbed_params)
5153
if 'REQUEST_METHOD' in env:
5254
self.scope.span.set_tag(tags.HTTP_METHOD, env['REQUEST_METHOD'])
5355
if 'HTTP_HOST' in env:

0 commit comments

Comments
 (0)