Skip to content

Commit 371e67b

Browse files
fix(profiling): detect and correct non-string Location fields [backport 1.20] (#7010)
Backport 262a336 from #6675 to 1.20. Fixes #6652 by skipping objects that are not strings. This appears to be behavior specific to python 3.11, similar to how we have to fence frame/code objects. Still not 100% sure what causes this to happen, as we don't have a repro case. The sanitization procedure _could_ be special-cased to 3.11 or later, as we have no evidence of this defect in earlier pythons. ## Checklist - [X] Change(s) are motivated and described in the PR description. - [X] Testing strategy is described if automated tests are not included in the PR. - [X] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [X] Change is maintainable (easy to change, telemetry, documentation). - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: David Sanchez <[email protected]>
1 parent 2ee0834 commit 371e67b

File tree

5 files changed

+61
-9
lines changed

5 files changed

+61
-9
lines changed

ddtrace/internal/datadog/profiling/_ddup.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ from ddtrace.internal.compat import ensure_binary
88
from ddtrace.internal.constants import DEFAULT_SERVICE_NAME
99
from ddtrace.span import Span
1010

11+
from .utils import sanitize_string
12+
1113

1214
IF UNAME_SYSNAME == "Linux":
1315
cdef extern from "exporter.hpp":
@@ -115,8 +117,8 @@ IF UNAME_SYSNAME == "Linux":
115117
ddup_push_lock_name(ensure_binary(lock_name))
116118

117119
def push_frame(name: str, filename: str, address: int, line: int) -> None:
118-
name = name if name is not None else ""
119-
filename = filename if filename is not None else ""
120+
name = sanitize_string(name)
121+
filename = sanitize_string(filename)
120122
ddup_push_frame(ensure_binary(name), ensure_binary(filename), address, line)
121123

122124
def push_threadinfo(thread_id: int, thread_native_id: int, thread_name: Optional[str]) -> None:

ddtrace/internal/datadog/profiling/ddup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from .utils import sanitize_string # noqa: F401
2+
3+
14
try:
25
from ._ddup import * # noqa: F403, F401
36
except ImportError:
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from sys import version_info
2+
from typing import Any
3+
4+
from ddtrace.internal.logger import get_logger
5+
6+
7+
LOG = get_logger(__name__)
8+
9+
10+
# 3.11 and above
11+
def _sanitize_string_check(value):
12+
# type: (Any) -> str
13+
if isinstance(value, str):
14+
return value
15+
elif value is None:
16+
return ""
17+
try:
18+
return value.decode("utf-8", "ignore")
19+
except Exception:
20+
LOG.warning("Got object of type '%s' instead of str during profile serialization", type(value).__name__)
21+
return "[invalid string]%s" % type(value).__name__
22+
23+
24+
# 3.10 and below (the noop version)
25+
def _sanitize_string_identity(value):
26+
# type: (Any) -> str
27+
return value or ""
28+
29+
30+
# Assign based on version
31+
sanitize_string = _sanitize_string_check if version_info[:2] > (3, 10) else _sanitize_string_identity

ddtrace/profiling/exporter/pprof.pyx

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from ddtrace import ext
1212
from ddtrace.internal import packages
1313
from ddtrace.internal._encoding import ListStringTable as _StringTable
1414
from ddtrace.internal.compat import ensure_str
15+
from ddtrace.internal.datadog.profiling.ddup import sanitize_string
1516
from ddtrace.internal.utils import config
1617
from ddtrace.profiling import event
1718
from ddtrace.profiling import exporter
@@ -190,9 +191,14 @@ class _PprofConverter(object):
190191

191192
def _to_Function(
192193
self,
193-
filename: str,
194-
funcname: str,
195-
) -> pprof_FunctionType:
194+
filename, # type: str
195+
funcname, # type: str
196+
):
197+
# type: (...) -> pprof_FunctionType
198+
# filename/funcname are "guaranteed" to be str, but on 3.11 and later
199+
# they may (erroneously?) be bytes. Try to fix this.
200+
filename = sanitize_string(filename)
201+
funcname = sanitize_string(funcname)
196202
try:
197203
return self._functions[(filename, funcname)]
198204
except KeyError:
@@ -206,10 +212,15 @@ class _PprofConverter(object):
206212

207213
def _to_Location(
208214
self,
209-
filename: str,
210-
lineno: int,
211-
funcname: str,
212-
) -> pprof_LocationType:
215+
filename, # type: str
216+
lineno, # type: int
217+
funcname, # type: str
218+
):
219+
# type: (...) -> pprof_LocationType
220+
# filename/funcname are "guaranteed" to be str, but on 3.11 and later
221+
# they may (erroneously?) be bytes. Try to fix this.
222+
filename = sanitize_string(filename)
223+
funcname = sanitize_string(funcname)
213224
try:
214225
return self._locations[(filename, lineno, funcname)]
215226
except KeyError:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
profiling: Fixed an issue with data encoding where non-string objects
5+
might be interned on Python 3.11

0 commit comments

Comments
 (0)