Skip to content

Commit be36ebb

Browse files
majorgreysbrettlangdon
authored andcommitted
[httplib] [requests] Sanitize urls in span metadata (#688)
* [httplib] Strip all but path from url * [httplib] Fix tests * [requests] Sanitize url * [httplib] Add comment * Correct comment * Make httlib and requests consistent
1 parent 8dab248 commit be36ebb

File tree

5 files changed

+53
-20
lines changed

5 files changed

+53
-20
lines changed

ddtrace/compat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import sys
21
import platform
2+
import sys
33

44
PYTHON_VERSION_INFO = sys.version_info
55
PY2 = sys.version_info[0] == 2

ddtrace/contrib/httplib/patch.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
import wrapt
66

77
# Project
8-
from ...compat import httplib, PY2
8+
from ...compat import PY2, httplib, parse
99
from ...ext import http as ext_http
1010
from ...pin import Pin
1111
from ...utils.wrappers import unwrap as _u
1212

13-
1413
span_name = 'httplib.request' if PY2 else 'http.client.request'
1514

1615
log = logging.getLogger(__name__)
@@ -60,10 +59,23 @@ def _wrap_putrequest(func, instance, args, kwargs):
6059
method, path = args[:2]
6160
scheme = 'https' if isinstance(instance, httplib.HTTPSConnection) else 'http'
6261
port = ':{port}'.format(port=instance.port)
62+
6363
if (scheme == 'http' and instance.port == 80) or (scheme == 'https' and instance.port == 443):
6464
port = ''
6565
url = '{scheme}://{host}{port}{path}'.format(scheme=scheme, host=instance.host, port=port, path=path)
66-
span.set_tag(ext_http.URL, url)
66+
67+
# sanitize url
68+
parsed = parse.urlparse(url)
69+
sanitized_url = parse.urlunparse((
70+
parsed.scheme,
71+
parsed.netloc,
72+
parsed.path,
73+
parsed.params,
74+
None, # drop query
75+
parsed.fragment
76+
))
77+
78+
span.set_tag(ext_http.URL, sanitized_url)
6779
span.set_tag(ext_http.METHOD, method)
6880
except Exception:
6981
log.debug('error applying request tags', exc_info=True)

ddtrace/contrib/requests/connection.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
import logging
2-
import ddtrace
32

3+
import ddtrace
44
from ddtrace import config
55

6-
from .constants import DEFAULT_SERVICE
7-
8-
from ...ext import http
96
from ...compat import parse
7+
from ...ext import http
108
from ...propagation.http import HTTPPropagator
11-
9+
from .constants import DEFAULT_SERVICE
1210

1311
log = logging.getLogger(__name__)
1412

@@ -55,6 +53,15 @@ def _wrap_request(func, instance, args, kwargs):
5553
url = kwargs.get('url') or args[1]
5654
headers = kwargs.get('headers', {})
5755
parsed_uri = parse.urlparse(url)
56+
# sanitize url of query
57+
sanitized_url = parse.urlunparse((
58+
parsed_uri.scheme,
59+
parsed_uri.netloc,
60+
parsed_uri.path,
61+
parsed_uri.params,
62+
None, # drop parsed_uri.query
63+
parsed_uri.fragment
64+
))
5865

5966
with tracer.trace("requests.request", span_type=http.TYPE) as span:
6067
# update the span service name before doing any action
@@ -76,7 +83,7 @@ def _wrap_request(func, instance, args, kwargs):
7683
finally:
7784
try:
7885
span.set_tag(http.METHOD, method.upper())
79-
span.set_tag(http.URL, url)
86+
span.set_tag(http.URL, sanitized_url)
8087
if response is not None:
8188
span.set_tag(http.STATUS_CODE, response.status_code)
8289
# `span.error` must be an integer

tests/contrib/httplib/test_httplib.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,15 @@
77
import wrapt
88

99
# Project
10-
from ddtrace.compat import httplib, PY2
10+
from ddtrace.compat import PY2, httplib
1111
from ddtrace.contrib.httplib import patch, unpatch
1212
from ddtrace.contrib.httplib.patch import should_skip_request
1313
from ddtrace.pin import Pin
14-
1514
from tests.opentracer.utils import init_tracer
15+
1616
from ...test_tracer import get_dummy_tracer
1717
from ...util import assert_dict_issuperset, override_global_tracer
1818

19-
2019
if PY2:
2120
from urllib2 import urlopen, build_opener, Request
2221
else:
@@ -221,7 +220,7 @@ def test_httplib_request_post_request(self):
221220
def test_httplib_request_get_request_query_string(self):
222221
"""
223222
When making a GET request with a query string via httplib.HTTPConnection.request
224-
we capture a the entire url in the span
223+
we capture the all of the url in the span except for the query string
225224
"""
226225
conn = self.get_http_connection(SOCKET)
227226
with contextlib.closing(conn):
@@ -242,7 +241,8 @@ def test_httplib_request_get_request_query_string(self):
242241
{
243242
'http.method': 'GET',
244243
'http.status_code': '200',
245-
'http.url': '{}?key=value&key2=value2'.format(URL_200),
244+
# check url metadata lacks query string
245+
'http.url': '{}'.format(URL_200),
246246
}
247247
)
248248

tests/contrib/requests/test_requests.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import unittest
2-
import requests
32

3+
import requests
44
from requests import Session
55
from requests.exceptions import MissingSchema
6-
from nose.tools import eq_, assert_raises
76

87
from ddtrace import config
9-
from ddtrace.ext import http, errors
108
from ddtrace.contrib.requests import patch, unpatch
11-
9+
from ddtrace.ext import errors, http
10+
from nose.tools import assert_raises, eq_
1211
from tests.opentracer.utils import init_tracer
13-
from ...util import override_global_tracer
12+
1413
from ...test_tracer import get_dummy_tracer
14+
from ...util import override_global_tracer
1515

1616
# socket name comes from https://english.stackexchange.com/a/44048
1717
SOCKET = 'httpbin.org'
@@ -105,6 +105,20 @@ def test_200(self):
105105
eq_(s.error, 0)
106106
eq_(s.span_type, http.TYPE)
107107

108+
def test_200_query_string(self):
109+
# ensure query string is removed before adding url to metadata
110+
out = self.session.get(URL_200 + '?key=value&key2=value2')
111+
eq_(out.status_code, 200)
112+
# validation
113+
spans = self.tracer.writer.pop()
114+
eq_(len(spans), 1)
115+
s = spans[0]
116+
eq_(s.get_tag(http.METHOD), 'GET')
117+
eq_(s.get_tag(http.STATUS_CODE), '200')
118+
eq_(s.get_tag(http.URL), URL_200)
119+
eq_(s.error, 0)
120+
eq_(s.span_type, http.TYPE)
121+
108122
def test_requests_module_200(self):
109123
# ensure the requests API is instrumented even without
110124
# using a `Session` directly

0 commit comments

Comments
 (0)