Skip to content

Commit 9b95258

Browse files
fix(profiling): default to libdatadog if autoinject/SSI is enabled [backport 2.7] (#8834)
Backport 1c1f54a from #8822 to 2.7. In injected configurations, the Profiler can cause deployed (bundled? stashed?) dependencies to over-ride the dependencies of the underlying application. This is especially problematic for the `protobuf` library. This PR changes several things * Introduces a new "required" option to the libdatadog (libdd) collector/exporter. Normally, if the user merely enables libdatadog, the profiler will make a best effort at using it, but fallback to the old collector/exporter if needed. When libdatadog is required, profiling will be disabled if libdatadog cannot be used. * Has lib-inection set libdatadog to "required" ## Checklist - [X] Change(s) are motivated and described in the PR description - [X] Testing strategy is described if automated tests are not included in the PR - [X] Risks are described (performance impact, potential for breakage, maintainability) - [X] Change is maintainable (easy to change, telemetry, documentation) - [X] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [X] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [X] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [X] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) Co-authored-by: David Sanchez <[email protected]>
1 parent 9a811d9 commit 9b95258

File tree

8 files changed

+147
-66
lines changed

8 files changed

+147
-66
lines changed

ddtrace/profiling/collector/memalloc.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@ def _start_service(self):
7171
if _memalloc is None:
7272
raise collector.CollectorUnavailable
7373

74-
if self._export_libdd_enabled and not ddup.is_available:
75-
self._export_libdd_enabled = False
76-
7774
try:
7875
_memalloc.start(self.max_nframe, self._max_events, self.heap_sample_size)
7976
except RuntimeError:

ddtrace/profiling/collector/stack.pyx

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -487,24 +487,7 @@ class StackCollector(collector.PeriodicCollector):
487487

488488
# If libdd is enabled, propagate the configuration
489489
if config.export.libdd_enabled:
490-
if not ddup.is_available:
491-
# We don't report on this, since it's already been reported in profiler.py
492-
set_use_libdd(False)
493-
494-
# If the user had also set stack.v2.enabled, then we need to disable that as well.
495-
if self._stack_collector_v2_enabled:
496-
self._stack_collector_v2_enabled = False
497-
LOG.error("Stack v2 was requested, but the libdd collector could not be enabled. Falling back to the v1 stack sampler.")
498-
else:
499-
set_use_libdd(True)
500-
501-
# If stack v2 is requested, verify it is loaded properly and that libdd has been enabled.
502-
if self._stack_collector_v2_enabled:
503-
if not stack_v2.is_available:
504-
self._stack_collector_v2_enabled = False
505-
LOG.error("Stack v2 was requested, but it could not be enabled. Check debug logs for more information.")
506-
if not use_libdd:
507-
self._stack_collector_v2_enabled = False
490+
set_use_libdd(True)
508491

509492
# If at the end of things, stack v2 is still enabled, then start the native thread running the v2 sampler
510493
if self._stack_collector_v2_enabled:

ddtrace/profiling/profiler.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,23 @@ class _ProfilerInstance(service.Service):
128128
init=False, factory=lambda: os.environ.get("AWS_LAMBDA_FUNCTION_NAME"), type=Optional[str]
129129
)
130130
_export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled)
131+
_export_libdd_required = attr.ib(type=bool, default=config.export.libdd_required)
131132

132133
ENDPOINT_TEMPLATE = "https://intake.profile.{}"
133134

134135
def _build_default_exporters(self):
135136
# type: (...) -> List[exporter.Exporter]
136-
_OUTPUT_PPROF = config.output_pprof
137-
if _OUTPUT_PPROF:
138-
# DEV: Import this only if needed to avoid importing protobuf
139-
# unnecessarily
140-
from ddtrace.profiling.exporter import file
141-
142-
return [
143-
file.PprofFileExporter(prefix=_OUTPUT_PPROF),
144-
]
137+
if not self._export_libdd_enabled:
138+
# If libdatadog support is enabled, we can skip this part
139+
_OUTPUT_PPROF = config.output_pprof
140+
if _OUTPUT_PPROF:
141+
# DEV: Import this only if needed to avoid importing protobuf
142+
# unnecessarily
143+
from ddtrace.profiling.exporter import file
144+
145+
return [
146+
file.PprofFileExporter(prefix=_OUTPUT_PPROF),
147+
]
145148

146149
if self.url is not None:
147150
endpoint = self.url
@@ -168,13 +171,11 @@ def _build_default_exporters(self):
168171
if self._lambda_function_name is not None:
169172
self.tags.update({"functionname": self._lambda_function_name})
170173

171-
# It's possible to fail to load the libdatadog collector, so we check. Any other consumers
172-
# of libdd can do their own check, so we just log.
173-
if self._export_libdd_enabled and not ddup.is_available:
174-
LOG.error("Failed to load the libdd collector, falling back to the legacy collector")
175-
self._export_libdd_enabled = False
176-
elif self._export_libdd_enabled:
177-
LOG.debug("Using the libdd collector")
174+
# Did the user request the libdd collector? Better log it.
175+
if self._export_libdd_enabled:
176+
LOG.debug("The libdd collector is enabled")
177+
if self._export_libdd_required:
178+
LOG.debug("The libdd collector is required")
178179

179180
# Build the list of enabled Profiling features and send along as a tag
180181
configured_features = []
@@ -194,6 +195,8 @@ def _build_default_exporters(self):
194195
configured_features.append("exp_dd")
195196
else:
196197
configured_features.append("exp_py")
198+
if self._export_libdd_required:
199+
configured_features.append("req_dd")
197200
configured_features.append("CAP" + str(config.capture_pct))
198201
configured_features.append("MAXF" + str(config.max_frames))
199202
self.tags.update({"profiler_config": "_".join(configured_features)})
@@ -202,34 +205,53 @@ def _build_default_exporters(self):
202205
if self.endpoint_collection_enabled:
203206
endpoint_call_counter_span_processor.enable()
204207

208+
# If libdd is enabled, then
209+
# * If initialization fails, disable the libdd collector and fall back to the legacy exporter
210+
# * If initialization fails and libdd is required, disable everything and return (error)
205211
if self._export_libdd_enabled:
206-
ddup.init(
207-
env=self.env,
208-
service=self.service,
209-
version=self.version,
210-
tags=self.tags, # type: ignore
211-
max_nframes=config.max_frames,
212-
url=endpoint,
213-
)
214-
else:
215-
# DEV: Import this only if needed to avoid importing protobuf
216-
# unnecessarily
217-
from ddtrace.profiling.exporter import http
218-
219-
return [
220-
http.PprofHTTPExporter(
221-
service=self.service,
212+
try:
213+
ddup.init(
222214
env=self.env,
223-
tags=self.tags,
215+
service=self.service,
224216
version=self.version,
225-
api_key=self.api_key,
226-
endpoint=endpoint,
227-
endpoint_path=endpoint_path,
228-
enable_code_provenance=self.enable_code_provenance,
229-
endpoint_call_counter_span_processor=endpoint_call_counter_span_processor,
217+
tags=self.tags, # type: ignore
218+
max_nframes=config.max_frames,
219+
url=endpoint,
230220
)
231-
]
232-
return []
221+
return []
222+
except Exception as e:
223+
LOG.error("Failed to initialize libdd collector (%s), falling back to the legacy collector", e)
224+
self._export_libdd_enabled = False
225+
config.export.libdd_enabled = False
226+
227+
# If we're here and libdd was required, then there's nothing else to do. We don't have a
228+
# collector.
229+
if self._export_libdd_required:
230+
LOG.error("libdd collector is required but could not be initialized. Disabling profiling.")
231+
config.enabled = False
232+
config.export.libdd_required = False
233+
config.lock.enabled = False
234+
config.memory.enabled = False
235+
config.stack.enabled = False
236+
return []
237+
238+
# DEV: Import this only if needed to avoid importing protobuf
239+
# unnecessarily
240+
from ddtrace.profiling.exporter import http
241+
242+
return [
243+
http.PprofHTTPExporter(
244+
service=self.service,
245+
env=self.env,
246+
tags=self.tags,
247+
version=self.version,
248+
api_key=self.api_key,
249+
endpoint=endpoint,
250+
endpoint_path=endpoint_path,
251+
enable_code_provenance=self.enable_code_provenance,
252+
endpoint_call_counter_span_processor=endpoint_call_counter_span_processor,
253+
)
254+
]
233255

234256
def __attrs_post_init__(self):
235257
# type: (...) -> None

ddtrace/profiling/scheduler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ def __attrs_post_init__(self):
3333
def _start_service(self):
3434
# type: (...) -> None
3535
"""Start the scheduler."""
36-
if self._export_libdd_enabled and not ddup.is_available:
37-
self._export_libdd_enabled = False
3836
LOG.debug("Starting scheduler")
3937
super(Scheduler, self)._start_service()
4038
self._last_export = compat.time_ns()

ddtrace/settings/profiling.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,37 @@ def _derive_default_heap_sample_size(heap_config, default_heap_sample_size=1024
3737
return int(max(math.ceil(total_mem / max_samples), default_heap_sample_size))
3838

3939

40+
def _check_for_ddup_available():
41+
ddup_is_available = False
42+
try:
43+
from ddtrace.internal.datadog.profiling import ddup
44+
45+
ddup_is_available = ddup.is_available
46+
except Exception:
47+
pass # nosec
48+
return ddup_is_available
49+
50+
51+
def _check_for_stack_v2_available():
52+
stack_v2_is_available = False
53+
if not _check_for_ddup_available():
54+
return False
55+
56+
try:
57+
from ddtrace.internal.datadog.profiling import stack_v2
58+
59+
stack_v2_is_available = stack_v2.is_available
60+
except Exception:
61+
pass # nosec
62+
return stack_v2_is_available
63+
64+
65+
# We don't check for the availability of the ddup module when determining whether libdd is _required_,
66+
# since it's up to the application code to determine what happens in that failure case.
67+
def _is_libdd_required(config):
68+
return config.stack.v2.enabled or config._libdd_required
69+
70+
4071
class ProfilingConfig(En):
4172
__prefix__ = "dd.profiling"
4273

@@ -162,14 +193,16 @@ class Stack(En):
162193
class V2(En):
163194
__item__ = __prefix__ = "v2"
164195

165-
enabled = En.v(
196+
_enabled = En.v(
166197
bool,
167198
"enabled",
168199
default=False,
169200
help_type="Boolean",
170201
help="Whether to enable the v2 stack profiler. Also enables the libdatadog collector.",
171202
)
172203

204+
enabled = En.d(bool, lambda c: _check_for_stack_v2_available() and c._enabled)
205+
173206
class Lock(En):
174207
__item__ = __prefix__ = "lock"
175208

@@ -223,13 +256,36 @@ class Heap(En):
223256
class Export(En):
224257
__item__ = __prefix__ = "export"
225258

226-
libdd_enabled = En.v(
259+
_libdd_required = En.v(
260+
bool,
261+
"libdd_required",
262+
default=False,
263+
help_type="Boolean",
264+
help="Requires the native exporter to be enabled",
265+
)
266+
267+
libdd_required = En.d(
268+
bool,
269+
_is_libdd_required,
270+
)
271+
272+
_libdd_enabled = En.v(
227273
bool,
228274
"libdd_enabled",
229275
default=False,
230276
help_type="Boolean",
231-
help="Enables collection and export using the experimental exporter",
277+
help="Enables collection and export using a native exporter. Can fallback to the pure-Python exporter.",
278+
)
279+
280+
libdd_enabled = En.d(
281+
bool, lambda c: (_is_libdd_required(c) or c._libdd_enabled) and _check_for_ddup_available()
232282
)
233283

284+
Export.include(Stack, namespace="stack")
285+
234286

235287
config = ProfilingConfig()
288+
289+
if config.export.libdd_required and not config.export.libdd_enabled:
290+
logger.warning("The native exporter is required, but not enabled. Disabling profiling.")
291+
config.enabled = False

lib-injection/dl_wheels.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import argparse
2020
import itertools
2121
import os
22+
from pathlib import Path
23+
import shutil
2224
import subprocess
2325
import sys
2426

@@ -122,3 +124,16 @@
122124
)
123125
# Remove the wheel as it has been unpacked
124126
os.remove(wheel_file)
127+
128+
sitepackages_root = Path(dl_dir) / f"site-packages-ddtrace-py{python_version}-{platform}"
129+
directories_to_remove = [
130+
sitepackages_root / "google" / "protobuf",
131+
sitepackages_root / "google" / "_upb",
132+
]
133+
directories_to_remove.extend(sitepackages_root.glob("protobuf-*")) # dist-info directories
134+
135+
for directory in directories_to_remove:
136+
try:
137+
shutil.rmtree(directory)
138+
except Exception:
139+
pass

lib-injection/sitecustomize.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def _inject():
7777
_log("failed to load ddtrace module: %s" % e, level="error")
7878
return
7979
else:
80+
# In injected environments, the profiler needs to know that it is only allowed to use the native exporter
81+
os.environ["DD_PROFILING_EXPORT_LIBDD_REQUIRED"] = "true"
82+
8083
# This import has the same effect as ddtrace-run for the current process (auto-instrument all libraries).
8184
import ddtrace.bootstrap.sitecustomize
8285

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
fixes:
3+
- |
4+
Profiling: This fix resolves an issue where the profiler was forcing protobuf to load in injected environments,
5+
causing crashes in configurations which relied on older protobuf versions. The profiler will now detect
6+
when injection is used and try loading with the native exporter. If that fails, it will self-disable
7+
rather than loading protobuf.

0 commit comments

Comments
 (0)