Skip to content

Commit 08dbcaf

Browse files
hmstepaneklrafeei
andauthored
Add adaptive_sampling_target setting & fix logic (#1549)
* Fix removal of process attr call * Add adaptive_sampling_target setting & fix logic * Add a new setting: `distributed_tracing.sampler.adaptive_sampling_target` that can be used to configure the sampling target. * Previously the logic for remote parent sampled was slightly incorrect. The sampling value from the tracestate header was used to determine remote parent sampled, instead of from the parent header-this has been fixed. Additionally, the sampling value was only grabbed from the trace state and newrelic headers if there was a priority-this has been fixed too. * Log statements have been added to assist with sampling debug. --------- Co-authored-by: Lalleh Rafeei <[email protected]>
1 parent 008a7e8 commit 08dbcaf

File tree

6 files changed

+147
-56
lines changed

6 files changed

+147
-56
lines changed

newrelic/api/transaction.py

Lines changed: 87 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ def __init__(self, application, enabled=None, source=None):
285285
self.tracestate = ""
286286
self._priority = None
287287
self._sampled = None
288-
self._traceparent_sampled = None
288+
# Remote parent sampled is set from the W3C parent header or the Newrelic header if no W3C parent header is present.
289+
self._remote_parent_sampled = None
289290

290291
self._distributed_trace_state = 0
291292

@@ -569,7 +570,7 @@ def __exit__(self, exc, value, tb):
569570
if self._settings.distributed_tracing.enabled:
570571
# Sampled and priority need to be computed at the end of the
571572
# transaction when distributed tracing or span events are enabled.
572-
self._compute_sampled_and_priority()
573+
self._make_sampling_decision()
573574

574575
self._cached_path._name = self.path
575576
agent_attributes = self.agent_attributes
@@ -1004,35 +1005,91 @@ def _update_agent_attributes(self):
10041005
def user_attributes(self):
10051006
return create_attributes(self._custom_params, DST_ALL, self.attribute_filter)
10061007

1007-
def sampling_algo_compute_sampled_and_priority(self):
1008-
if self._priority is None:
1008+
def sampling_algo_compute_sampled_and_priority(self, priority, sampled):
1009+
# self._priority and self._sampled are set when parsing the W3C tracestate
1010+
# or newrelic DT headers and may be overridden in _make_sampling_decision
1011+
# based on the configuration. The only time they are set in here is when the
1012+
# sampling decision must be made by the adaptive sampling algorithm.
1013+
if priority is None:
10091014
# Truncate priority field to 6 digits past the decimal.
1010-
self._priority = float(f"{random.random():.6f}") # noqa: S311
1011-
if self._sampled is None:
1012-
self._sampled = self._application.compute_sampled()
1013-
if self._sampled:
1014-
self._priority += 1
1015-
1016-
def _compute_sampled_and_priority(self):
1017-
if self._traceparent_sampled is None:
1015+
priority = float(f"{random.random():.6f}") # noqa: S311
1016+
if sampled is None:
1017+
_logger.debug("No trusted account id found. Sampling decision will be made by adaptive sampling algorithm.")
1018+
sampled = self._application.compute_sampled()
1019+
if sampled:
1020+
priority += 1
1021+
return priority, sampled
1022+
1023+
def _compute_sampled_and_priority(
1024+
self,
1025+
priority,
1026+
sampled,
1027+
remote_parent_sampled_path,
1028+
remote_parent_sampled_setting,
1029+
remote_parent_not_sampled_path,
1030+
remote_parent_not_sampled_setting,
1031+
):
1032+
if self._remote_parent_sampled is None:
10181033
config = "default" # Use sampling algo.
1019-
elif self._traceparent_sampled:
1020-
setting_path = "distributed_tracing.sampler.remote_parent_sampled"
1021-
config = self.settings.distributed_tracing.sampler.remote_parent_sampled
1022-
else: # self._traceparent_sampled is False.
1023-
setting_path = "distributed_tracing.sampler.remote_parent_not_sampled"
1024-
config = self.settings.distributed_tracing.sampler.remote_parent_not_sampled
1025-
1034+
_logger.debug("Sampling decision made based on no remote parent sampling decision present.")
1035+
elif self._remote_parent_sampled:
1036+
setting_path = remote_parent_sampled_path
1037+
config = remote_parent_sampled_setting
1038+
_logger.debug(
1039+
"Sampling decision made based on remote_parent_sampled=%s and %s=%s.",
1040+
self._remote_parent_sampled,
1041+
setting_path,
1042+
config,
1043+
)
1044+
else: # self._remote_parent_sampled is False.
1045+
setting_path = remote_parent_not_sampled_path
1046+
config = remote_parent_not_sampled_setting
1047+
_logger.debug(
1048+
"Sampling decision made based on remote_parent_sampled=%s and %s=%s.",
1049+
self._remote_parent_sampled,
1050+
setting_path,
1051+
config,
1052+
)
10261053
if config == "always_on":
1027-
self._sampled = True
1028-
self._priority = 2.0
1054+
sampled = True
1055+
priority = 2.0
10291056
elif config == "always_off":
1030-
self._sampled = False
1031-
self._priority = 0
1057+
sampled = False
1058+
priority = 0
10321059
else:
1033-
if config != "default":
1060+
if config not in ("default", "adaptive"):
10341061
_logger.warning("%s=%s is not a recognized value. Using 'default' instead.", setting_path, config)
1035-
self.sampling_algo_compute_sampled_and_priority()
1062+
1063+
_logger.debug(
1064+
"Let adaptive sampler algorithm decide based on sampled=%s and priority=%s.", sampled, priority
1065+
)
1066+
priority, sampled = self.sampling_algo_compute_sampled_and_priority(priority, sampled)
1067+
return priority, sampled
1068+
1069+
def _make_sampling_decision(self):
1070+
# The sampling decision is computed each time a DT header is generated for exit spans as it is needed
1071+
# to send the DT headers. Don't recompute the sampling decision multiple times as it is expensive.
1072+
if hasattr(self, "_sampling_decision_made"):
1073+
return
1074+
priority = self._priority
1075+
sampled = self._sampled
1076+
_logger.debug(
1077+
"Full granularity tracing is enabled. Asking if full granularity wants to sample. priority=%s, sampled=%s",
1078+
priority,
1079+
sampled,
1080+
)
1081+
computed_priority, computed_sampled = self._compute_sampled_and_priority(
1082+
priority,
1083+
sampled,
1084+
remote_parent_sampled_path="distributed_tracing.sampler.remote_parent_sampled",
1085+
remote_parent_sampled_setting=self.settings.distributed_tracing.sampler.remote_parent_sampled,
1086+
remote_parent_not_sampled_path="distributed_tracing.sampler.remote_parent_not_sampled",
1087+
remote_parent_not_sampled_setting=self.settings.distributed_tracing.sampler.remote_parent_not_sampled,
1088+
)
1089+
_logger.debug("Full granularity sampling decision was %s with priority=%s.", sampled, priority)
1090+
self._priority = computed_priority
1091+
self._sampled = computed_sampled
1092+
self._sampling_decision_made = True
10361093

10371094
def _freeze_path(self):
10381095
if self._frozen_path is None:
@@ -1101,7 +1158,7 @@ def _create_distributed_trace_data(self):
11011158
if not (account_id and application_id and trusted_account_key and settings.distributed_tracing.enabled):
11021159
return
11031160

1104-
self._compute_sampled_and_priority()
1161+
self._make_sampling_decision()
11051162
data = {
11061163
"ty": "App",
11071164
"ac": account_id,
@@ -1204,7 +1261,7 @@ def _accept_distributed_trace_payload(self, payload, transport_type="HTTP"):
12041261
if not any(k in data for k in ("id", "tx")):
12051262
self._record_supportability("Supportability/DistributedTrace/AcceptPayload/ParseException")
12061263
return False
1207-
1264+
self._remote_parent_sampled = data.get("sa")
12081265
settings = self._settings
12091266
account_id = data.get("ac")
12101267
trusted_account_key = settings.trusted_account_key or (
@@ -1254,10 +1311,8 @@ def _accept_distributed_trace_data(self, data, transport_type):
12541311

12551312
self._trace_id = data.get("tr")
12561313

1257-
priority = data.get("pr")
1258-
if priority is not None:
1259-
self._priority = priority
1260-
self._sampled = data.get("sa")
1314+
self._priority = data.get("pr")
1315+
self._sampled = data.get("sa")
12611316

12621317
if "ti" in data:
12631318
transport_start = data["ti"] / 1000.0
@@ -1297,6 +1352,7 @@ def accept_distributed_trace_headers(self, headers, transport_type="HTTP"):
12971352
try:
12981353
traceparent = ensure_str(traceparent).strip()
12991354
data = W3CTraceParent.decode(traceparent)
1355+
self._remote_parent_sampled = data.pop("sa", None)
13001356
except:
13011357
data = None
13021358

@@ -1332,7 +1388,6 @@ def accept_distributed_trace_headers(self, headers, transport_type="HTTP"):
13321388
else:
13331389
self._record_supportability("Supportability/TraceContext/TraceState/NoNrEntry")
13341390

1335-
self._traceparent_sampled = data.get("sa")
13361391
self._accept_distributed_trace_data(data, transport_type)
13371392
self._record_supportability("Supportability/TraceContext/Accept/Success")
13381393
return True

newrelic/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ def _process_configuration(section):
404404
_process_setting(section, "ml_insights_events.enabled", "getboolean", None)
405405
_process_setting(section, "distributed_tracing.enabled", "getboolean", None)
406406
_process_setting(section, "distributed_tracing.exclude_newrelic_header", "getboolean", None)
407+
_process_setting(section, "distributed_tracing.sampler.adaptive_sampling_target", "getint", None)
407408
_process_setting(section, "distributed_tracing.sampler.remote_parent_sampled", "get", None)
408409
_process_setting(section, "distributed_tracing.sampler.remote_parent_not_sampled", "get", None)
409410
_process_setting(section, "span_events.enabled", "getboolean", None)

newrelic/core/agent_protocol.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@ def _connect_payload(app_name, linked_applications, environment, settings):
297297
connect_settings["browser_monitoring.loader"] = settings["browser_monitoring.loader"]
298298
connect_settings["browser_monitoring.debug"] = settings["browser_monitoring.debug"]
299299
connect_settings["ai_monitoring.enabled"] = settings["ai_monitoring.enabled"]
300+
connect_settings["distributed_tracing.sampler.adaptive_sampling_target"] = settings[
301+
"distributed_tracing.sampler.adaptive_sampling_target"
302+
]
300303

301304
security_settings = {}
302305
security_settings["capture_params"] = settings["capture_params"]

newrelic/core/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,9 @@ def default_otlp_host(host):
836836
_settings.ml_insights_events.enabled = False
837837

838838
_settings.distributed_tracing.enabled = _environ_as_bool("NEW_RELIC_DISTRIBUTED_TRACING_ENABLED", default=True)
839+
_settings.distributed_tracing.sampler.adaptive_sampling_target = _environ_as_int(
840+
"NEW_RELIC_DISTRIBUTED_TRACING_SAMPLER_ADAPTIVE_SAMPLING_TARGET", default=10
841+
)
839842
_settings.distributed_tracing.sampler.remote_parent_sampled = os.environ.get(
840843
"NEW_RELIC_DISTRIBUTED_TRACING_SAMPLER_REMOTE_PARENT_SAMPLED", "default"
841844
)

tests/agent_features/test_distributed_tracing.py

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from newrelic.api.wsgi_application import wsgi_application
4040
from newrelic.core.attribute import Attribute
4141

42+
# ruff: noqa: UP031
4243
distributed_trace_intrinsics = ["guid", "traceId", "priority", "sampled"]
4344
inbound_payload_intrinsics = [
4445
"parent.type",
@@ -419,24 +420,41 @@ def _test_inbound_dt_payload_acceptance():
419420

420421

421422
@pytest.mark.parametrize(
422-
"sampled,remote_parent_sampled,remote_parent_not_sampled,expected_sampled,expected_priority,expected_adaptive_sampling_algo_called",
423+
"traceparent_sampled,newrelic_sampled,remote_parent_sampled_setting,remote_parent_not_sampled_setting,expected_sampled,expected_priority,expected_adaptive_sampling_algo_called",
423424
(
424-
(True, "default", "default", None, None, True), # Uses sampling algo.
425-
(True, "always_on", "default", True, 2, False), # Always sampled.
426-
(True, "always_off", "default", False, 0, False), # Never sampled.
427-
(False, "default", "default", None, None, True), # Uses sampling algo.
428-
(False, "always_on", "default", None, None, True), # Uses sampling alog.
429-
(False, "always_off", "default", None, None, True), # Uses sampling algo.
430-
(True, "default", "always_on", None, None, True), # Uses sampling algo.
431-
(True, "default", "always_off", None, None, True), # Uses sampling algo.
432-
(False, "default", "always_on", True, 2, False), # Always sampled.
433-
(False, "default", "always_off", False, 0, False), # Never sampled.
425+
(True, None, "default", "default", None, None, True), # Uses adaptive sampling algo.
426+
(True, None, "always_on", "default", True, 2, False), # Always sampled.
427+
(True, None, "always_off", "default", False, 0, False), # Never sampled.
428+
(False, None, "default", "default", None, None, True), # Uses adaptive sampling algo.
429+
(False, None, "always_on", "default", None, None, True), # Uses adaptive sampling alog.
430+
(False, None, "always_off", "default", None, None, True), # Uses adaptive sampling algo.
431+
(True, None, "default", "always_on", None, None, True), # Uses adaptive sampling algo.
432+
(True, None, "default", "always_off", None, None, True), # Uses adaptive sampling algo.
433+
(False, None, "default", "always_on", True, 2, False), # Always sampled.
434+
(False, None, "default", "always_off", False, 0, False), # Never sampled.
435+
(True, True, "default", "default", True, 1.23456, False), # Uses sampling decision in W3C TraceState header.
436+
(True, False, "default", "default", False, 1.23456, False), # Uses sampling decision in W3C TraceState header.
437+
(False, False, "default", "default", False, 1.23456, False), # Uses sampling decision in W3C TraceState header.
438+
(True, False, "always_on", "default", True, 2, False), # Always sampled.
439+
(True, True, "always_off", "default", False, 0, False), # Never sampled.
440+
(False, False, "default", "always_on", True, 2, False), # Always sampled.
441+
(False, True, "default", "always_off", False, 0, False), # Never sampled.
442+
(None, True, "default", "default", True, 0.1234, False), # Uses sampling and priority from newrelic header.
443+
(None, True, "always_on", "default", True, 2, False), # Always sampled.
444+
(None, True, "always_off", "default", False, 0, False), # Never sampled.
445+
(None, False, "default", "default", False, 0.1234, False), # Uses sampling and priority from newrelic header.
446+
(None, False, "always_on", "default", False, 0.1234, False), # Uses sampling and priority from newrelic header.
447+
(None, True, "default", "always_on", True, 0.1234, False), # Uses sampling and priority from newrelic header.
448+
(None, False, "default", "always_on", True, 2, False), # Always sampled.
449+
(None, False, "default", "always_off", False, 0, False), # Never sampled.
450+
(None, None, "default", "default", None, None, True), # Uses adaptive sampling algo.
434451
),
435452
)
436-
def test_distributed_trace_w3cparent_sampling_decision(
437-
sampled,
438-
remote_parent_sampled,
439-
remote_parent_not_sampled,
453+
def test_distributed_trace_remote_parent_sampling_decision_full_granularity(
454+
traceparent_sampled,
455+
newrelic_sampled,
456+
remote_parent_sampled_setting,
457+
remote_parent_not_sampled_setting,
440458
expected_sampled,
441459
expected_priority,
442460
expected_adaptive_sampling_algo_called,
@@ -450,18 +468,18 @@ def test_distributed_trace_w3cparent_sampling_decision(
450468
test_settings = _override_settings.copy()
451469
test_settings.update(
452470
{
453-
"distributed_tracing.sampler.remote_parent_sampled": remote_parent_sampled,
454-
"distributed_tracing.sampler.remote_parent_not_sampled": remote_parent_not_sampled,
471+
"distributed_tracing.sampler.remote_parent_sampled": remote_parent_sampled_setting,
472+
"distributed_tracing.sampler.remote_parent_not_sampled": remote_parent_not_sampled_setting,
455473
"span_events.enabled": True,
456474
}
457475
)
458476
if expected_adaptive_sampling_algo_called:
459477
function_called_decorator = validate_function_called(
460-
"newrelic.api.transaction", "Transaction.sampling_algo_compute_sampled_and_priority"
478+
"newrelic.core.adaptive_sampler", "AdaptiveSampler.compute_sampled"
461479
)
462480
else:
463481
function_called_decorator = validate_function_not_called(
464-
"newrelic.api.transaction", "Transaction.sampling_algo_compute_sampled_and_priority"
482+
"newrelic.core.adaptive_sampler", "AdaptiveSampler.compute_sampled"
465483
)
466484

467485
@function_called_decorator
@@ -471,10 +489,20 @@ def test_distributed_trace_w3cparent_sampling_decision(
471489
def _test():
472490
txn = current_transaction()
473491

474-
headers = {
475-
"traceparent": f"00-0af7651916cd43dd8448eb211c80319c-00f067aa0ba902b7-{int(sampled):02x}",
476-
"tracestate": "rojo=f06a0ba902b7,congo=t61rcWkgMzE",
477-
}
492+
if traceparent_sampled is not None:
493+
headers = {
494+
"traceparent": f"00-0af7651916cd43dd8448eb211c80319c-00f067aa0ba902b7-{int(traceparent_sampled):02x}",
495+
"newrelic": '{"v":[0,1],"d":{"ty":"Mobile","ac":"123","ap":"51424","id":"5f474d64b9cc9b2a","tr":"6e2fea0b173fdad0","pr":0.1234,"sa":true,"ti":1482959525577,"tx":"27856f70d3d314b7"}}', # This header should be ignored.
496+
}
497+
if newrelic_sampled is not None:
498+
headers["tracestate"] = (
499+
f"1@nr=0-0-1-2827902-0af7651916cd43dd-00f067aa0ba902b7-{int(newrelic_sampled)}-1.23456-1518469636035"
500+
)
501+
else:
502+
headers = {
503+
"newrelic": '{"v":[0,1],"d":{"ty":"Mobile","ac":"1","ap":"51424","id":"00f067aa0ba902b7","tr":"0af7651916cd43dd8448eb211c80319c","pr":0.1234,"sa":%s,"ti":1482959525577,"tx":"0af7651916cd43dd"}}'
504+
% (str(newrelic_sampled).lower())
505+
}
478506
accept_distributed_trace_headers(headers)
479507

480508
_test()

tests/agent_unittests/test_agent_protocol.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,11 @@ def connect_payload_asserts(
278278
assert len(payload_data["security_settings"]) == 2
279279
assert payload_data["security_settings"]["capture_params"] == CAPTURE_PARAMS
280280
assert payload_data["security_settings"]["transaction_tracer"] == {"record_sql": RECORD_SQL}
281-
assert len(payload_data["settings"]) == 3
281+
assert len(payload_data["settings"]) == 4
282282
assert payload_data["settings"]["browser_monitoring.loader"] == (BROWSER_MONITORING_LOADER)
283283
assert payload_data["settings"]["browser_monitoring.debug"] == (BROWSER_MONITORING_DEBUG)
284284
assert payload_data["settings"]["ai_monitoring.enabled"] is False
285+
assert payload_data["settings"]["distributed_tracing.sampler.adaptive_sampling_target"] == 10
285286

286287
utilization_len = 5
287288

0 commit comments

Comments
 (0)