Skip to content

Commit 543a6d9

Browse files
fix(opentelemetry): record errors in span events [backport 2.9] (#9448)
Backport 7577ee8 from #9379 to 2.9. #9380 introduced support for generating and serializing span events for the Opentelemetry API. This PR uses span events to implement the records exception api: https://opentelemetry.io/docs/specs/otel/trace/exceptions/#recording-an-exception. With this enhancement span events will be fully supported. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: Munir Abdinur <[email protected]>
1 parent 01b9b10 commit 543a6d9

File tree

6 files changed

+124
-65
lines changed

6 files changed

+124
-65
lines changed

ddtrace/_trace/span.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -549,16 +549,10 @@ def set_exc_info(self, exc_type, exc_val, exc_tb):
549549
return
550550

551551
self.error = 1
552-
self._set_exc_tags(exc_type, exc_val, exc_tb)
553-
554-
def _set_exc_tags(self, exc_type, exc_val, exc_tb):
555-
limit = config._span_traceback_max_size
556-
if limit is None:
557-
limit = 30
558552

559553
# get the traceback
560554
buff = StringIO()
561-
traceback.print_exception(exc_type, exc_val, exc_tb, file=buff, limit=limit)
555+
traceback.print_exception(exc_type, exc_val, exc_tb, file=buff, limit=config._span_traceback_max_size)
562556
tb = buff.getvalue()
563557

564558
# readable version of type (e.g. exceptions.ZeroDivisionError)

ddtrace/opentelemetry/_span.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import traceback
12
from typing import TYPE_CHECKING
23

34
from opentelemetry.trace import Span as OtelSpan
@@ -8,7 +9,10 @@
89
from opentelemetry.trace.span import TraceFlags
910
from opentelemetry.trace.span import TraceState
1011

12+
from ddtrace import config
1113
from ddtrace.constants import ERROR_MSG
14+
from ddtrace.constants import ERROR_STACK
15+
from ddtrace.constants import ERROR_TYPE
1216
from ddtrace.constants import SPAN_KIND
1317
from ddtrace.internal.compat import time_ns
1418
from ddtrace.internal.logger import get_logger
@@ -216,14 +220,34 @@ def set_status(self, status, description=None):
216220
def record_exception(self, exception, attributes=None, timestamp=None, escaped=False):
217221
# type: (BaseException, Optional[Attributes], Optional[int], bool) -> None
218222
"""
219-
Records the type, message, and traceback of an exception as Span attributes.
220-
Note - Span Events are not currently used to record exception info.
223+
Records an exception as an event
221224
"""
222225
if not self.is_recording():
223226
return
224-
self._ddspan._set_exc_tags(type(exception), exception, exception.__traceback__)
227+
# Set exception attributes in a manner that is consistent with the opentelemetry sdk
228+
# https://github.com/open-telemetry/opentelemetry-python/blob/v1.24.0/opentelemetry-sdk/src/opentelemetry/sdk/trace/__init__.py#L998
229+
# We will not set the exception.stacktrace attribute, this will reduce the size of the span event
230+
attrs = {
231+
"exception.type": "%s.%s" % (exception.__class__.__module__, exception.__class__.__name__),
232+
"exception.message": str(exception),
233+
"exception.escaped": str(escaped),
234+
}
225235
if attributes:
226-
self.set_attributes(attributes)
236+
# User provided attributes must take precedence over atrrs
237+
attrs.update(attributes)
238+
239+
# Set the error type, error message and error stacktrace tags on the span
240+
self._ddspan._meta[ERROR_MSG] = attrs["exception.message"]
241+
self._ddspan._meta[ERROR_TYPE] = attrs["exception.type"]
242+
if "exception.stacktrace" in attrs:
243+
self._ddspan._meta[ERROR_STACK] = attrs["exception.stacktrace"]
244+
else:
245+
self._ddspan._meta[ERROR_STACK] = "".join(
246+
traceback.format_exception(
247+
type(exception), exception, exception.__traceback__, limit=config._span_traceback_max_size
248+
)
249+
)
250+
self.add_event(name="exception", attributes=attrs, timestamp=timestamp)
227251

228252
def __enter__(self):
229253
# type: () -> Span
@@ -236,9 +260,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
236260
"""Ends Span context manager"""
237261
if exc_val:
238262
if self._record_exception:
239-
self.record_exception(exc_val)
263+
# Generates a span event for the exception
264+
self.record_exception(exc_val, escaped=True)
240265
if self._set_status_on_exception:
241-
# do not overwrite the status message set by record exception
266+
# Set the status of to Error, this will NOT set the `error.message` tag on the span
242267
self.set_status(StatusCode.ERROR)
243268
self.end()
244269

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
opentelemetry: Records exceptions on spans in a manner that is consistent with the `otel specification <https://opentelemetry.io/docs/specs/otel/trace/exceptions/#recording-an-exception>`_

tests/opentelemetry/test_trace.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import mock
12
import opentelemetry
23
import opentelemetry.version
34
import pytest
@@ -17,24 +18,31 @@ def test_otel_compatible_tracer_is_returned_by_tracer_provider():
1718
assert isinstance(otel_compatible_tracer, opentelemetry.trace.Tracer)
1819

1920

20-
@pytest.mark.snapshot(wait_for_num_traces=1)
21-
def test_otel_start_span_with_default_args(oteltracer):
22-
otel_span = oteltracer.start_span("test-start-span")
21+
@pytest.mark.snapshot(wait_for_num_traces=1, ignores=["meta.error.stack"])
22+
def test_otel_start_span_record_exception(oteltracer):
23+
# Avoid mocking time_ns when Span is created. This is a workaround to resolve a rate limit bug.
24+
raised_span = oteltracer.start_span("test-raised-exception")
2325
with pytest.raises(Exception, match="Sorry Otel Span, I failed you"):
24-
with opentelemetry.trace.use_span(
25-
otel_span,
26-
end_on_exit=False,
27-
record_exception=False,
28-
set_status_on_exception=False,
29-
):
30-
otel_span.update_name("rename-start-span")
31-
raise Exception("Sorry Otel Span, I failed you")
32-
33-
# set_status_on_exception is False
34-
assert otel_span._ddspan.error == 0
35-
# Since end_on_exit=False start_as_current_span should not call Span.end()
36-
assert otel_span.is_recording()
37-
otel_span.end()
26+
# Ensures that the exception is recorded with the consistent timestamp for snapshot testing
27+
with mock.patch("ddtrace._trace.span.time_ns", return_value=1716560261227739000):
28+
with raised_span:
29+
raised_span.record_exception(ValueError("Invalid Operation 1"))
30+
raise Exception("Sorry Otel Span, I failed you")
31+
32+
with oteltracer.start_span("test-recorded-exception") as not_raised_span:
33+
not_raised_span.record_exception(
34+
IndexError("Invalid Operation 2"), {"exception.stuff": "thing 2"}, 1716560281337739
35+
)
36+
not_raised_span.record_exception(
37+
Exception("Real Exception"),
38+
{
39+
"exception.type": "RandoException",
40+
"exception.message": "MoonEar Fire!!!",
41+
"exception.stacktrace": "Fake traceback",
42+
"exception.details": "This is FAKE, I overwrote the real exception details",
43+
},
44+
1716560271237812,
45+
)
3846

3947

4048
@pytest.mark.snapshot(wait_for_num_traces=1)
@@ -47,22 +55,17 @@ def test_otel_start_span_without_default_args(oteltracer):
4755
attributes={"start_span_tag": "start_span_val"},
4856
links=None,
4957
start_time=0,
50-
record_exception=True,
51-
set_status_on_exception=True,
58+
record_exception=False,
59+
set_status_on_exception=False,
5260
)
53-
otel_span.update_name("rename-start-span")
5461
with pytest.raises(Exception, match="Sorry Otel Span, I failed you"):
55-
with opentelemetry.trace.use_span(
56-
otel_span,
57-
end_on_exit=False,
58-
record_exception=False,
59-
set_status_on_exception=False,
60-
):
62+
with otel_span:
63+
otel_span.update_name("rename-start-span")
6164
raise Exception("Sorry Otel Span, I failed you")
6265

6366
# set_status_on_exception is False
6467
assert otel_span._ddspan.error == 0
65-
assert otel_span.is_recording()
68+
assert otel_span.is_recording() is False
6669
assert root.is_recording()
6770
otel_span.end()
6871
root.end()
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
[[
2+
{
3+
"name": "internal",
4+
"service": "",
5+
"resource": "test-raised-exception",
6+
"trace_id": 0,
7+
"span_id": 1,
8+
"parent_id": 0,
9+
"type": "",
10+
"error": 1,
11+
"meta": {
12+
"_dd.p.dm": "-0",
13+
"_dd.p.tid": "6658897400000000",
14+
"error.message": "Sorry Otel Span, I failed you",
15+
"error.stack": "Traceback (most recent call last):\n File \"/Users/munirabdinur/go/src/github.com/DataDog/dd-trace-py/tests/opentelemetry/test_trace.py\", line 27, in test_otel_start_span_record_exception\n raise Exception(\"Sorry Otel Span, I failed you\")\nException: Sorry Otel Span, I failed you\n",
16+
"error.type": "builtins.Exception",
17+
"events": "[{\"name\": \"exception\", \"time_unix_nano\": 1716560261227739000, \"attributes\": {\"exception.type\": \"builtins.ValueError\", \"exception.message\": \"Invalid Operation 1\", \"exception.escaped\": \"False\"}}, {\"name\": \"exception\", \"time_unix_nano\": 1716560261227739000, \"attributes\": {\"exception.type\": \"builtins.Exception\", \"exception.message\": \"Sorry Otel Span, I failed you\", \"exception.escaped\": \"True\"}}]",
18+
"language": "python",
19+
"runtime-id": "d0950ce7bda6498183acde9036abb131"
20+
},
21+
"metrics": {
22+
"_dd.top_level": 1,
23+
"_dd.tracer_kr": 1.0,
24+
"_sampling_priority_v1": 1,
25+
"process_id": 71659
26+
},
27+
"duration": 518127750028000,
28+
"start": 1716560261227739000
29+
}],
30+
[
31+
{
32+
"name": "internal",
33+
"service": "",
34+
"resource": "test-recorded-exception",
35+
"trace_id": 1,
36+
"span_id": 1,
37+
"parent_id": 0,
38+
"type": "",
39+
"error": 0,
40+
"meta": {
41+
"_dd.p.dm": "-0",
42+
"_dd.p.tid": "6658897400000000",
43+
"error.message": "MoonEar Fire!!!",
44+
"error.stack": "Fake traceback",
45+
"error.type": "RandoException",
46+
"events": "[{\"name\": \"exception\", \"time_unix_nano\": 1716560281337739000, \"attributes\": {\"exception.type\": \"builtins.IndexError\", \"exception.message\": \"Invalid Operation 2\", \"exception.escaped\": \"False\", \"exception.stuff\": \"thing 2\"}}, {\"name\": \"exception\", \"time_unix_nano\": 1716560271237812000, \"attributes\": {\"exception.type\": \"RandoException\", \"exception.message\": \"MoonEar Fire!!!\", \"exception.escaped\": \"False\", \"exception.stacktrace\": \"Fake traceback\", \"exception.details\": \"This is FAKE, I overwrote the real exception details\"}}]",
47+
"language": "python",
48+
"runtime-id": "d0950ce7bda6498183acde9036abb131"
49+
},
50+
"metrics": {
51+
"_dd.top_level": 1,
52+
"_dd.tracer_kr": 1.0,
53+
"_sampling_priority_v1": 1,
54+
"process_id": 71659
55+
},
56+
"duration": 518127750240000,
57+
"start": 1716560281337739000
58+
}]]

tests/snapshots/tests.opentelemetry.test_trace.test_otel_start_span_with_default_args.json

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)