Skip to content

Commit 09156b9

Browse files
committed
Redact specific url query string values and url credentials by default in instrumentations
1 parent 4a1e0ce commit 09156b9

File tree

3 files changed

+114
-15
lines changed

3 files changed

+114
-15
lines changed

util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
SpanAttributes.HTTP_SERVER_NAME,
5959
}
6060

61+
PARAMS_TO_REDACT = ["AWSAccessKeyId", "Signature", "sig", "X-Goog-Signature"]
6162

6263
class ExcludeList:
6364
"""Class to exclude certain paths (given as a list of regexes) from tracing requests"""
@@ -160,22 +161,23 @@ def parse_excluded_urls(excluded_urls: str) -> ExcludeList:
160161

161162
def remove_url_credentials(url: str) -> str:
162163
"""Given a string url, remove the username and password only if it is a valid url"""
163-
164+
# Modifying current functionality of removing url credentials and instead replacing the username and password with the keyword "REDACTED" as per the semantic conventions for http-spans (https://github.com/open-telemetry/semantic-conventions/blob/main/docs/http/http-spans.md)
164165
try:
165166
parsed = urlparse(url)
166167
if all([parsed.scheme, parsed.netloc]): # checks for valid url
167-
parsed_url = urlparse(url)
168-
_, _, netloc = parsed.netloc.rpartition("@")
169-
return urlunparse(
170-
(
171-
parsed_url.scheme,
172-
netloc,
173-
parsed_url.path,
174-
parsed_url.params,
175-
parsed_url.query,
176-
parsed_url.fragment,
168+
if '@' in parsed.netloc:
169+
_, _, host = parsed.netloc.rpartition("@")
170+
new_netloc = "REDACTED:REDACTED@" + host
171+
return urlunparse(
172+
(
173+
parsed.scheme,
174+
new_netloc,
175+
parsed.path,
176+
parsed.params,
177+
parsed.query,
178+
parsed.fragment,
179+
)
177180
)
178-
)
179181
except ValueError: # an unparsable url was passed
180182
pass
181183
return url
@@ -255,3 +257,43 @@ def _parse_url_query(url: str):
255257
path = parsed_url.path
256258
query_params = parsed_url.query
257259
return path, query_params
260+
261+
def redact_query_parameters(url: str) -> str:
262+
"""Given a string url, redact sensitive query parameter values"""
263+
try:
264+
parsed = urlparse(url)
265+
if not parsed.query: # No query parameters to redact
266+
return url
267+
268+
# Check if any of the sensitive parameters are in the query
269+
has_sensitive_params = any(param + "=" in parsed.query for param in PARAMS_TO_REDACT)
270+
if not has_sensitive_params:
271+
return url
272+
273+
# Process query parameters
274+
query_parts: list[str] = []
275+
for query_part in parsed.query.split("&"):
276+
if "=" in query_part:
277+
param_name, _ = query_part.split("=", 1) # Parameter name and value
278+
if param_name in PARAMS_TO_REDACT:
279+
query_parts.append(f"{param_name}=REDACTED")
280+
else:
281+
query_parts.append(query_part)
282+
else:
283+
query_parts.append(query_part) # Handle params with no value
284+
285+
# Reconstruct the URL with redacted query parameters
286+
redacted_query = "&".join(query_parts)
287+
return urlunparse(
288+
(
289+
parsed.scheme,
290+
parsed.netloc,
291+
parsed.path,
292+
parsed.params,
293+
redacted_query,
294+
parsed.fragment,
295+
)
296+
)
297+
except ValueError: # an unparsable url was passed
298+
pass
299+
return url
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import unittest
2+
from opentelemetry.util.http import redact_query_parameters
3+
4+
class TestRedactSensitiveInfo(unittest.TestCase):
5+
def test_redact_goog_signature(self):
6+
url = "https://www.example.com/path?color=blue&X-Goog-Signature=secret"
7+
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&X-Goog-Signature=REDACTED")
8+
9+
def test_no_redaction_needed(self):
10+
url = "https://www.example.com/path?color=blue&query=secret"
11+
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&query=secret")
12+
13+
def test_no_query_parameters(self):
14+
url = "https://www.example.com/path"
15+
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path")
16+
17+
def test_empty_query_string(self):
18+
url = "https://www.example.com/path?"
19+
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?")
20+
21+
def test_empty_url(self):
22+
url = ""
23+
self.assertEqual(redact_query_parameters(url), "")
24+
25+
def test_redact_aws_access_key_id(self):
26+
url = "https://www.example.com/path?color=blue&AWSAccessKeyId=secrets"
27+
self.assertEqual(redact_query_parameters(url), "https://www.example.com/path?color=blue&AWSAccessKeyId=REDACTED")
28+
29+
def test_api_key_not_in_redact_list(self):
30+
url = "https://www.example.com/path?api_key=secret%20key&user=john"
31+
self.assertNotEqual(redact_query_parameters(url), "https://www.example.com/path?api_key=REDACTED&user=john")
32+
33+
def test_password_key_not_in_redact_list(self):
34+
url = "https://api.example.com?key=abc&password=123&user=admin"
35+
self.assertNotEqual(redact_query_parameters(url), "https://api.example.com?key=REDACTED&password=REDACTED&user=admin")
36+
37+
def test_url_with_at_symbol_in_path_and_query(self):
38+
url = "https://github.com/p@th?foo=b@r"
39+
self.assertEqual(redact_query_parameters(url), "https://github.com/p@th?foo=b@r")
40+
41+
def test_aws_access_key_with_real_format(self):
42+
url = "https://microsoft.com?AWSAccessKeyId=AKIAIOSFODNN7"
43+
self.assertEqual(redact_query_parameters(url), "https://microsoft.com?AWSAccessKeyId=REDACTED")
44+
45+
def test_signature_parameter(self):
46+
url = "https://service.com?sig=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0"
47+
self.assertEqual(redact_query_parameters(url), "https://service.com?sig=REDACTED")
48+
49+
def test_signature_with_url_encoding(self):
50+
url = "https://service.com?Signature=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0%3A377"
51+
self.assertEqual(redact_query_parameters(url), "https://service.com?Signature=REDACTED")

util/opentelemetry-util-http/tests/test_remove_credentials.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,25 @@ def test_remove_credentials(self):
1313
url = "http://someuser:[email protected]:8080/test/path?query=value"
1414
cleaned_url = remove_url_credentials(url)
1515
self.assertEqual(
16-
cleaned_url, "http://opentelemetry.io:8080/test/path?query=value"
16+
cleaned_url, "http://REDACTED:REDACTED@opentelemetry.io:8080/test/path?query=value"
1717
)
1818

1919
def test_remove_credentials_ipv4_literal(self):
2020
url = "http://someuser:[email protected]:8080/test/path?query=value"
2121
cleaned_url = remove_url_credentials(url)
2222
self.assertEqual(
23-
cleaned_url, "http://127.0.0.1:8080/test/path?query=value"
23+
cleaned_url, "http://REDACTED:REDACTED@127.0.0.1:8080/test/path?query=value"
2424
)
2525

2626
def test_remove_credentials_ipv6_literal(self):
2727
url = "http://someuser:somepass@[::1]:8080/test/path?query=value"
2828
cleaned_url = remove_url_credentials(url)
2929
self.assertEqual(
30-
cleaned_url, "http://[::1]:8080/test/path?query=value"
30+
cleaned_url, "http://REDACTED:REDACTED@[::1]:8080/test/path?query=value"
3131
)
32+
33+
def test_empty_url(self):
34+
url = ""
35+
cleaned_url = remove_url_credentials(url)
36+
self.assertEqual(cleaned_url, url)
37+

0 commit comments

Comments
 (0)