Skip to content

Commit e8681f7

Browse files
authored
feat(profiling): extract class name from stack traces (#4012)
This adds an heuristic for extracting class names from stack traces. This is far from being perfect, as it relies on the convention of using `self` and `cls` as name for the local variable in a function call to designate instances or classes. However, that ought to be enough for the vast majority of the Python program seen in the wild.
1 parent e3908f8 commit e8681f7

File tree

14 files changed

+585
-310
lines changed

14 files changed

+585
-310
lines changed

.github/workflows/test_frameworks.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -366,10 +366,11 @@ jobs:
366366
run: pip install ../ddtrace
367367
- name: Install dependencies
368368
run: |
369-
python -m pip install -U pip setuptools wheel
369+
python -m pip install -U pip setuptools wheel pytest
370370
python -m pip install -e .[test]
371371
- name: Run tests
372-
run: ddtrace-run python setup.py test
372+
# Disable tests checking GC references since profiling can interfere
373+
run: ddtrace-run python -m pytest -k 'not test_record_gc and not test_record_get and not test_record_items and not test_record_iter' tests
373374

374375
pylons-testsuite-1_0_3:
375376
name: Pylons 1.0.3

ddtrace/profiling/collector/_memalloc_tb.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ static traceback_t* traceback_buffer = NULL;
1313
/* A string containing "<unknown>" just in case we can't store the real function
1414
* or file name. */
1515
static PyObject* unknown_name = NULL;
16+
/* A string containing "" */
17+
static PyObject* empty_string = NULL;
1618

1719
#define TRACEBACK_SIZE(NFRAME) (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME - 1))
1820

@@ -26,6 +28,13 @@ memalloc_tb_init(uint16_t max_nframe)
2628
PyUnicode_InternInPlace(&unknown_name);
2729
}
2830

31+
if (empty_string == NULL) {
32+
empty_string = PyUnicode_FromString("");
33+
if (empty_string == NULL)
34+
return -1;
35+
PyUnicode_InternInPlace(&empty_string);
36+
}
37+
2938
/* Allocate a buffer that can handle the largest traceback possible.
3039
This will be used a temporary buffer when converting stack traces. */
3140
traceback_buffer = PyMem_RawMalloc(TRACEBACK_SIZE(max_nframe));
@@ -171,7 +180,7 @@ traceback_to_tuple(traceback_t* tb)
171180
PyObject* stack = PyTuple_New(tb->nframe);
172181

173182
for (uint16_t nframe = 0; nframe < tb->nframe; nframe++) {
174-
PyObject* frame_tuple = PyTuple_New(3);
183+
PyObject* frame_tuple = PyTuple_New(4);
175184

176185
frame_t* frame = &tb->frames[nframe];
177186

@@ -180,6 +189,9 @@ traceback_to_tuple(traceback_t* tb)
180189
PyTuple_SET_ITEM(frame_tuple, 1, PyLong_FromUnsignedLong(frame->lineno));
181190
PyTuple_SET_ITEM(frame_tuple, 2, frame->name);
182191
Py_INCREF(frame->name);
192+
/* Class name */
193+
PyTuple_SET_ITEM(frame_tuple, 3, empty_string);
194+
Py_INCREF(empty_string);
183195

184196
PyTuple_SET_ITEM(stack, nframe, frame_tuple);
185197
}

ddtrace/profiling/collector/_traceback.pyx

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
cpdef _extract_class_name(frame):
2+
# type: (...) -> str
3+
"""Extract class name from a frame, if possible.
4+
5+
:param frame: The frame object.
6+
"""
7+
if frame.f_code.co_varnames:
8+
argname = frame.f_code.co_varnames[0]
9+
try:
10+
value = frame.f_locals[argname]
11+
except KeyError:
12+
return ""
13+
try:
14+
if argname == "self":
15+
return object.__getattribute__(type(value), "__name__") # use type() and object.__getattribute__ to avoid side-effects
16+
if argname == "cls":
17+
return object.__getattribute__(value, "__name__")
18+
except AttributeError:
19+
return ""
20+
return ""
21+
22+
123
cpdef traceback_to_frames(traceback, max_nframes):
224
"""Serialize a Python traceback object into a list of tuple of (filename, lineno, function_name).
325
@@ -13,7 +35,7 @@ cpdef traceback_to_frames(traceback, max_nframes):
1335
frame = tb.tb_frame
1436
code = frame.f_code
1537
lineno = 0 if frame.f_lineno is None else frame.f_lineno
16-
frames.insert(0, (code.co_filename, lineno, code.co_name))
38+
frames.insert(0, (code.co_filename, lineno, code.co_name, _extract_class_name(frame)))
1739
nframes += 1
1840
tb = tb.tb_next
1941
return frames, nframes
@@ -32,6 +54,6 @@ cpdef pyframe_to_frames(frame, max_nframes):
3254
if len(frames) < max_nframes:
3355
code = frame.f_code
3456
lineno = 0 if frame.f_lineno is None else frame.f_lineno
35-
frames.append((code.co_filename, lineno, code.co_name))
57+
frames.append((code.co_filename, lineno, code.co_name, _extract_class_name(frame)))
3658
frame = frame.f_back
3759
return frames, nframes

ddtrace/profiling/event.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
_T = typing.TypeVar("_T")
1010

11-
# (filename, line number, function name)
12-
FrameType = typing.Tuple[str, int, str]
11+
# (filename, line number, function name, class name)
12+
FrameType = typing.Tuple[str, int, str, str]
1313
StackTraceType = typing.List[FrameType]
1414

1515

ddtrace/profiling/exporter/pprof.pyx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ class _PprofConverter(object):
195195
_functions = attr.ib(
196196
init=False, factory=dict, type=typing.Dict[typing.Tuple[str, typing.Optional[str]], pprof_FunctionType]
197197
)
198-
_locations = attr.ib(init=False, factory=dict, type=typing.Dict[event.FrameType, pprof_LocationType])
198+
_locations = attr.ib(init=False, factory=dict, type=typing.Dict[typing.Tuple[str, int, str], pprof_LocationType])
199199
_string_table = attr.ib(init=False, factory=_StringTable)
200200

201201
_last_location_id = attr.ib(init=False, factory=_Sequence)
@@ -257,7 +257,9 @@ class _PprofConverter(object):
257257
nframes, # type: int
258258
):
259259
# type: (...) -> typing.Tuple[int, ...]
260-
locations = [self._to_Location(filename, lineno, funcname).id for filename, lineno, funcname in frames]
260+
locations = [
261+
self._to_Location(filename, lineno, funcname).id for filename, lineno, funcname, class_name in frames
262+
]
261263

262264
omitted = nframes - len(frames)
263265
if omitted:
@@ -295,6 +297,7 @@ class _PprofConverter(object):
295297
("span id", span_id),
296298
("trace endpoint", trace_resource),
297299
("trace type", trace_type),
300+
("class name", frames[0][3]),
298301
),
299302
)
300303

@@ -369,6 +372,7 @@ class _PprofConverter(object):
369372
("trace endpoint", trace_resource),
370373
("trace type", trace_type),
371374
("lock name", lock_name),
375+
("class name", frames[0][3]),
372376
),
373377
)
374378

@@ -406,6 +410,7 @@ class _PprofConverter(object):
406410
("trace endpoint", trace_resource),
407411
("trace type", trace_type),
408412
("lock name", lock_name),
413+
("class name", frames[0][3]),
409414
),
410415
)
411416

@@ -439,6 +444,7 @@ class _PprofConverter(object):
439444
("trace endpoint", trace_resource),
440445
("trace type", trace_type),
441446
("exception type", exc_type_name),
447+
("class name", frames[0][3]),
442448
),
443449
)
444450

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
features:
3+
- |
4+
profiler: collect and export the class name for the wall time, CPU
5+
time and lock profiles, when available.

tests/profiling/collector/test_asyncio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ async def test_lock_acquire_events():
2424
# It's called through pytest so I'm sure it's gonna be that long, right?
2525
assert len(event.frames) > 3
2626
assert event.nframes > 3
27-
assert event.frames[0] == (__file__, 16, "test_lock_acquire_events")
27+
assert event.frames[0] == (__file__, 16, "test_lock_acquire_events", "")
2828
assert event.sampling_pct == 100
2929

3030

@@ -45,7 +45,7 @@ async def test_asyncio_lock_release_events():
4545
# It's called through pytest so I'm sure it's gonna be that long, right?
4646
assert len(event.frames) > 3
4747
assert event.nframes > 3
48-
assert event.frames[0] == (__file__, 38, "test_asyncio_lock_release_events")
48+
assert event.frames[0] == (__file__, 38, "test_asyncio_lock_release_events", "")
4949
assert event.sampling_pct == 100
5050

5151

tests/profiling/collector/test_stack.py

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,71 @@ def test_collect_once():
8888
pytest.fail("Unable to find MainThread")
8989

9090

91+
def _find_sleep_event(events, class_name):
92+
class_method_found = False
93+
class_classmethod_found = False
94+
95+
for e in events:
96+
for frame in e.frames:
97+
if frame[0] == __file__.replace(".pyc", ".py") and frame[2] == "sleep_class" and frame[3] == class_name:
98+
class_method_found = True
99+
elif (
100+
frame[0] == __file__.replace(".pyc", ".py") and frame[2] == "sleep_instance" and frame[3] == class_name
101+
):
102+
class_classmethod_found = True
103+
104+
if class_method_found and class_classmethod_found:
105+
return True
106+
107+
return False
108+
109+
110+
def test_collect_once_with_class():
111+
class SomeClass(object):
112+
@classmethod
113+
def sleep_class(cls):
114+
# type: (...) -> bool
115+
return cls().sleep_instance()
116+
117+
def sleep_instance(self):
118+
# type: (...) -> bool
119+
for _ in range(5):
120+
if _find_sleep_event(r.events[stack_event.StackSampleEvent], "SomeClass"):
121+
return True
122+
nogevent.sleep(1)
123+
return False
124+
125+
r = recorder.Recorder()
126+
s = stack.StackCollector(r)
127+
128+
with s:
129+
assert SomeClass.sleep_class()
130+
131+
132+
def test_collect_once_with_class_not_right_type():
133+
# type: (...) -> None
134+
r = recorder.Recorder()
135+
136+
class SomeClass(object):
137+
@classmethod
138+
def sleep_class(foobar, cls):
139+
# type: (...) -> bool
140+
return foobar().sleep_instance(cls)
141+
142+
def sleep_instance(foobar, self):
143+
# type: (...) -> bool
144+
for _ in range(5):
145+
if _find_sleep_event(r.events[stack_event.StackSampleEvent], ""):
146+
return True
147+
nogevent.sleep(1)
148+
return False
149+
150+
s = stack.StackCollector(r)
151+
152+
with s:
153+
assert SomeClass.sleep_class(123)
154+
155+
91156
def _fib(n):
92157
if n == 1:
93158
return 1
@@ -309,7 +374,7 @@ def test_exception_collection_threads():
309374
assert e.sampling_period > 0
310375
assert e.thread_id in {t.ident for t in threads}
311376
assert isinstance(e.thread_name, str)
312-
assert e.frames == [("<string>", 5, "_f30")]
377+
assert e.frames == [("<string>", 5, "_f30", "")]
313378
assert e.nframes == 1
314379
assert e.exc_type == ValueError
315380
for t in threads:
@@ -333,7 +398,7 @@ def test_exception_collection():
333398
assert e.sampling_period > 0
334399
assert e.thread_id == nogevent.thread_get_ident()
335400
assert e.thread_name == "MainThread"
336-
assert e.frames == [(__file__, 327, "test_exception_collection")]
401+
assert e.frames == [(__file__, 392, "test_exception_collection", "")]
337402
assert e.nframes == 1
338403
assert e.exc_type == ValueError
339404

@@ -365,7 +430,7 @@ def test_exception_collection_trace(
365430
assert e.sampling_period > 0
366431
assert e.thread_id == nogevent.thread_get_ident()
367432
assert e.thread_name == "MainThread"
368-
assert e.frames == [(__file__, 354, "test_exception_collection_trace")]
433+
assert e.frames == [(__file__, 419, "test_exception_collection_trace", "")]
369434
assert e.nframes == 1
370435
assert e.exc_type == ValueError
371436
assert e.span_id == span.span_id
@@ -647,7 +712,7 @@ def _nothing():
647712
else:
648713
main_thread_found = True
649714
elif event.task_id in {t.ident for t in threads}:
650-
for filename, lineno, funcname in event.frames:
715+
for filename, lineno, funcname, classname in event.frames:
651716
if funcname in (
652717
"_nothing",
653718
"sleep",

tests/profiling/collector/test_stack_asyncio.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ async def hello() -> None:
6060
if _asyncio_compat.PY37_AND_LATER:
6161
if event.task_name == "main":
6262
assert event.thread_name == "MainThread"
63-
assert event.frames == [(__file__, 30, "hello")]
63+
assert event.frames == [(__file__, 30, "hello", "")]
6464
assert event.nframes == 1
6565
elif event.task_name == t1_name:
6666
assert event.thread_name == "MainThread"
67-
assert event.frames == [(__file__, 24, "stuff")]
67+
assert event.frames == [(__file__, 24, "stuff", "")]
6868
assert event.nframes == 1
6969
elif event.task_name == t2_name:
7070
assert event.thread_name == "MainThread"
71-
assert event.frames == [(__file__, 24, "stuff")]
71+
assert event.frames == [(__file__, 24, "stuff", "")]
7272
assert event.nframes == 1
7373

7474
if event.thread_name == "MainThread" and (

0 commit comments

Comments
 (0)