|
1 | 1 | from contextlib import contextmanager |
| 2 | +import re |
2 | 3 | from typing import Any |
3 | 4 | from typing import Callable |
4 | 5 | from typing import ContextManager |
5 | 6 | from typing import Generator |
6 | 7 | from typing import Optional |
| 8 | +from typing import Tuple |
7 | 9 | from typing import Union |
8 | 10 |
|
9 | 11 | from ddtrace.internal import compat |
@@ -40,6 +42,60 @@ def strip_query_string(url): |
40 | 42 | return h + fs + f |
41 | 43 |
|
42 | 44 |
|
| 45 | +def redact_query_string(query_string, query_string_obfuscation_pattern): |
| 46 | + # type: (str, Optional[re.Pattern]) -> Union[bytes, str] |
| 47 | + if query_string_obfuscation_pattern is None: |
| 48 | + return query_string |
| 49 | + |
| 50 | + bytes_query = query_string if isinstance(query_string, bytes) else query_string.encode("utf-8") |
| 51 | + return query_string_obfuscation_pattern.sub(b"<redacted>", bytes_query) |
| 52 | + |
| 53 | + |
| 54 | +def redact_url(url, query_string_obfuscation_pattern, query_string=None): |
| 55 | + # type: (str, re.Pattern, Optional[str]) -> Union[str,bytes] |
| 56 | + |
| 57 | + # Avoid further processing if obfuscation is disabled |
| 58 | + if query_string_obfuscation_pattern is None: |
| 59 | + return url |
| 60 | + |
| 61 | + parts = compat.parse.urlparse(url) |
| 62 | + redacted_query = None |
| 63 | + |
| 64 | + if query_string: |
| 65 | + redacted_query = redact_query_string(query_string, query_string_obfuscation_pattern) |
| 66 | + elif parts.query: |
| 67 | + redacted_query = redact_query_string(parts.query, query_string_obfuscation_pattern) |
| 68 | + |
| 69 | + if redacted_query is not None and len(parts) >= 5: |
| 70 | + redacted_parts = parts[:4] + (redacted_query,) + parts[5:] # type: Tuple[Union[str, bytes], ...] |
| 71 | + bytes_redacted_parts = tuple(x if isinstance(x, bytes) else x.encode("utf-8") for x in redacted_parts) |
| 72 | + return urlunsplit(bytes_redacted_parts, url) |
| 73 | + |
| 74 | + # If no obfuscation is performed, return original url |
| 75 | + return url |
| 76 | + |
| 77 | + |
| 78 | +def urlunsplit(components, original_url): |
| 79 | + # type: (Tuple[bytes, ...], str) -> bytes |
| 80 | + """ |
| 81 | + Adaptation from urlunsplit and urlunparse, using bytes components |
| 82 | + """ |
| 83 | + scheme, netloc, url, params, query, fragment = components |
| 84 | + if params: |
| 85 | + url = b"%s;%s" % (url, params) |
| 86 | + if netloc or (scheme and url[:2] != b"//"): |
| 87 | + if url and url[:1] != b"/": |
| 88 | + url = b"/" + url |
| 89 | + url = b"//%s%s" % ((netloc or b""), url) |
| 90 | + if scheme: |
| 91 | + url = b"%s:%s" % (scheme, url) |
| 92 | + if query or (original_url and original_url[-1] in ("?", b"?")): |
| 93 | + url = b"%s?%s" % (url, query) |
| 94 | + if fragment or (original_url and original_url[-1] in ("#", b"#")): |
| 95 | + url = b"%s#%s" % (url, fragment) |
| 96 | + return url |
| 97 | + |
| 98 | + |
43 | 99 | def connector(url, **kwargs): |
44 | 100 | # type: (str, Any) -> Connector |
45 | 101 | """Create a connector context manager for the given URL. |
|
0 commit comments