Skip to content

Commit d0a3adc

Browse files
authored
feat(internal): span sampling file config envar (backport #4042) (#4073)
This is an automatic backport of pull request #4042 done by [Mergify](https://mergify.com). Cherry-pick of c0e9a9b has failed: ``` On branch mergify/bp/1.4/pr-4042 Your branch is up to date with 'origin/1.4'. You are currently cherry-picking commit c0e9a9b. (fix conflicts and run "git cherry-pick --continue") (use "git cherry-pick --skip" to skip this patch) (use "git cherry-pick --abort" to cancel the cherry-pick operation) Changes to be committed: modified: ddtrace/constants.py Unmerged paths: (use "git add/rm <file>..." as appropriate to mark resolution) both modified: ddtrace/internal/sampling.py deleted by us: tests/tracer/test_single_span_sampling_rules.py ``` To fix up this pull request, you can check it out locally. See documentation: https://docs.github.com/en/github/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/checking-out-pull-requests-locally --- <details> <summary>Mergify commands and options</summary> <br /> More conditions and actions can be found in the [documentation](https://docs.mergify.com/). You can also trigger Mergify actions by commenting on this pull request: - `@Mergifyio refresh` will re-evaluate the rules - `@Mergifyio rebase` will rebase this PR on its base branch - `@Mergifyio update` will merge the base branch into this PR - `@Mergifyio backport <destination>` will backport this PR on `<destination>` branch Additionally, on Mergify [dashboard](https://dashboard.mergify.com/) you can: - look at your merge queues - generate the Mergify configuration with the config editor. Finally, you can contact us on https://mergify.com </details>
1 parent a0b01ec commit d0a3adc

File tree

3 files changed

+152
-41
lines changed

3 files changed

+152
-41
lines changed

ddtrace/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
_SINGLE_SPAN_SAMPLING_MECHANISM = "_dd.span_sampling.mechanism"
88
_SINGLE_SPAN_SAMPLING_RATE = "_dd.span_sampling.rule_rate"
99
_SINGLE_SPAN_SAMPLING_MAX_PER_SEC = "_dd.span_sampling.max_per_second"
10+
_SINGLE_SPAN_SAMPLING_MAX_PER_SEC_NO_LIMIT = -1
1011

1112
ORIGIN_KEY = "_dd.origin"
1213
HOSTNAME_KEY = "_dd.hostname"

ddtrace/internal/sampling.py

Lines changed: 79 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from typing import Optional
55
from typing import TYPE_CHECKING
66

7+
from six import string_types
8+
79

810
# TypedDict was added to typing in python 3.8
911
try:
@@ -12,6 +14,7 @@
1214
from typing_extensions import TypedDict
1315

1416
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MAX_PER_SEC
17+
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MAX_PER_SEC_NO_LIMIT
1518
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MECHANISM
1619
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_RATE
1720
from ddtrace.internal.glob_matching import GlobMatcher
@@ -29,6 +32,7 @@
2932
JSONDecodeError = ValueError # type: ignore
3033

3134
if TYPE_CHECKING:
35+
from typing import Any
3236
from typing import Dict
3337
from typing import List
3438
from typing import Text
@@ -208,44 +212,82 @@ def apply_span_sampling_tags(self, span):
208212

209213
def get_span_sampling_rules():
210214
# type: () -> List[SpanSamplingRule]
211-
json_rules_raw = os.getenv("DD_SPAN_SAMPLING_RULES")
212-
if json_rules_raw is None:
213-
return []
214-
else:
215-
sampling_rules = []
215+
json_rules = _get_span_sampling_json()
216+
sampling_rules = []
217+
for rule in json_rules:
218+
if not isinstance(rule, dict):
219+
raise TypeError("rule specified via DD_SPAN_SAMPLING_RULES is not a dictionary:%r" % rule)
220+
# If sample_rate not specified default to 100%
221+
sample_rate = float(rule.get("sample_rate", 1.0))
222+
service = rule.get("service")
223+
name = rule.get("name")
224+
# If max_per_second not specified default to no limit
225+
max_per_second = int(rule.get("max_per_second", _SINGLE_SPAN_SAMPLING_MAX_PER_SEC_NO_LIMIT))
226+
if service is not None and not isinstance(service, string_types):
227+
raise ValueError("The service value is not a string or None:%r" % service)
228+
if name is not None and not isinstance(name, string_types):
229+
raise ValueError("The name value is not a string or None:%r" % name)
230+
231+
if service is None and name is None:
232+
raise ValueError("Neither service or name specified for single span sampling rule:%r" % rule)
233+
if service:
234+
_check_unsupported_pattern(service)
235+
if name:
236+
_check_unsupported_pattern(name)
237+
216238
try:
217-
json_rules = json.loads(json_rules_raw) # type: List[SpanSamplingRules]
218-
if not isinstance(json_rules, list):
219-
raise TypeError("DD_SPAN_SAMPLING_RULES is not list, got %r" % json_rules)
220-
except JSONDecodeError:
221-
raise ValueError("Unable to parse DD_SPAN_SAMPLING_RULES=%r" % json_rules_raw)
222-
for rule in json_rules:
223-
if not isinstance(rule, dict):
224-
raise TypeError("rule specified via DD_SPAN_SAMPLING_RULES is not a dictionary:%r" % rule)
225-
# If sample_rate not specified default to 100%
226-
sample_rate = float(rule.get("sample_rate", 1.0))
227-
service = rule.get("service")
228-
name = rule.get("name")
229-
# If max_per_second not specified default to no limit
230-
max_per_second = int(rule.get("max_per_second", -1))
231-
if service is None and name is None:
232-
raise ValueError(
233-
"Neither service or name specified for single span sampling rule:%r,"
234-
"at least one of these must be specified" % rule
235-
)
236-
if service:
237-
_check_unsupported_pattern(service)
238-
if name:
239-
_check_unsupported_pattern(name)
240-
241-
try:
242-
sampling_rule = SpanSamplingRule(
243-
sample_rate=sample_rate, service=service, name=name, max_per_second=max_per_second
244-
)
245-
except Exception as e:
246-
raise ValueError("Error creating single span sampling rule {}: {}".format(json.dumps(rule), e))
247-
sampling_rules.append(sampling_rule)
248-
return sampling_rules
239+
sampling_rule = SpanSamplingRule(
240+
sample_rate=sample_rate, service=service, name=name, max_per_second=max_per_second
241+
)
242+
except Exception as e:
243+
raise ValueError("Error creating single span sampling rule {}: {}".format(json.dumps(rule), e))
244+
sampling_rules.append(sampling_rule)
245+
return sampling_rules
246+
247+
248+
def _get_span_sampling_json():
249+
# type: () -> List[Dict[str, Any]]
250+
env_json_rules = _get_env_json()
251+
file_json_rules = _get_file_json()
252+
253+
if env_json_rules and file_json_rules:
254+
log.warning(
255+
(
256+
"DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE detected. "
257+
"Defaulting to DD_SPAN_SAMPLING_RULES value."
258+
)
259+
)
260+
return env_json_rules
261+
return env_json_rules or file_json_rules or []
262+
263+
264+
def _get_file_json():
265+
# type: () -> Optional[List[Dict[str, Any]]]
266+
file_json_raw = os.getenv("DD_SPAN_SAMPLING_RULES_FILE")
267+
if file_json_raw:
268+
with open(file_json_raw) as f:
269+
return _load_span_sampling_json(f.read())
270+
return None
271+
272+
273+
def _get_env_json():
274+
# type: () -> Optional[List[Dict[str, Any]]]
275+
env_json_raw = os.getenv("DD_SPAN_SAMPLING_RULES")
276+
if env_json_raw:
277+
return _load_span_sampling_json(env_json_raw)
278+
return None
279+
280+
281+
def _load_span_sampling_json(raw_json_rules):
282+
# type: (str) -> List[Dict[str, Any]]
283+
try:
284+
json_rules = json.loads(raw_json_rules)
285+
if not isinstance(json_rules, list):
286+
raise TypeError("DD_SPAN_SAMPLING_RULES is not list, got %r" % json_rules)
287+
except JSONDecodeError:
288+
raise ValueError("Unable to parse DD_SPAN_SAMPLING_RULES=%r" % raw_json_rules)
289+
290+
return json_rules
249291

250292

251293
def _check_unsupported_pattern(string):

tests/tracer/test_single_span_sampling_rules.py

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import pytest
24

35
from ddtrace import Tracer
@@ -6,6 +8,7 @@
68
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_MECHANISM
79
from ddtrace.constants import _SINGLE_SPAN_SAMPLING_RATE
810
from ddtrace.internal.sampling import SamplingMechanism
11+
from ddtrace.internal.sampling import _get_file_json
912
from ddtrace.internal.sampling import get_span_sampling_rules
1013
from tests.utils import DummyWriter
1114

@@ -75,7 +78,23 @@ def test_sampling_rule_init_via_env():
7578
sampling_rules = get_span_sampling_rules()
7679

7780

78-
def test_rules_sample_span():
81+
def test_json_not_list_error():
82+
with override_env(
83+
dict(DD_SPAN_SAMPLING_RULES='{"sample_rate":0.5,"service":"xyz","name":"abc","max_per_second":100}')
84+
):
85+
with pytest.raises(TypeError):
86+
get_span_sampling_rules()
87+
88+
89+
def test_json_decode_error_throws_ValueError():
90+
with override_env(
91+
dict(DD_SPAN_SAMPLING_RULES='{"sample_rate":0.5,"service":"xyz","name":"abc","max_per_second":100')
92+
):
93+
with pytest.raises(ValueError):
94+
get_span_sampling_rules()
95+
96+
97+
def test_rules_sample_span_via_env():
7998
"""Test that single span sampling tags are applied to spans that should get sampled when envars set"""
8099
with override_env(dict(DD_SPAN_SAMPLING_RULES='[{"service":"test_service","name":"test_name"}]')):
81100
sampling_rules = get_span_sampling_rules()
@@ -89,7 +108,7 @@ def test_rules_sample_span():
89108
assert_sampling_decision_tags(span)
90109

91110

92-
def test_rules_do_not_sample_wrong_span():
111+
def test_rules_do_not_sample_wrong_span_via_env():
93112
"""Test that single span sampling tags are not applied to spans that do not match rules"""
94113
with override_env(dict(DD_SPAN_SAMPLING_RULES='[{"service":"test_ser","name":"test_na"}]')):
95114
sampling_rules = get_span_sampling_rules()
@@ -103,7 +122,7 @@ def test_rules_do_not_sample_wrong_span():
103122
assert_sampling_decision_tags(span, sample_rate=None, mechanism=None, limit=None)
104123

105124

106-
def test_single_span_rules_do_not_tag_if_tracer_samples():
125+
def test_single_span_rules_do_not_tag_if_tracer_samples_via_env():
107126
"""Test that single span sampling rules aren't applied if a span is already going to be sampled by trace sampler"""
108127
with override_env(dict(DD_SPAN_SAMPLING_RULES='[{"service":"test_service","name":"test_name"}]')):
109128
sampling_rules = get_span_sampling_rules()
@@ -119,6 +138,55 @@ def test_single_span_rules_do_not_tag_if_tracer_samples():
119138
assert_sampling_decision_tags(span, sample_rate=None, mechanism=None, limit=None, trace_sampling=True)
120139

121140

141+
def test_sampling_rule_init_config_multiple_sampling_rule_json_via_file(tmpdir):
142+
file = tmpdir.join("rules.json")
143+
file.write(
144+
'[{"service":"xy?","name":"a*c"}, \
145+
{"sample_rate":0.5,"service":"my-service","name":"my-name", "max_per_second":"20"}]'
146+
)
147+
148+
with override_env(dict(DD_SPAN_SAMPLING_RULES_FILE=str(file))):
149+
sampling_rules = _get_file_json()
150+
assert sampling_rules == [
151+
{"service": "xy?", "name": "a*c"},
152+
{"sample_rate": 0.5, "service": "my-service", "name": "my-name", "max_per_second": "20"},
153+
]
154+
155+
156+
def test_wrong_file_path(tmpdir):
157+
"""Test that single span sampling tags are not applied to spans that do not match rules via file"""
158+
with override_env(dict(DD_SPAN_SAMPLING_RULES_FILE="data/this_doesnt_exist.json")):
159+
exception = FileNotFoundError if sys.version_info.major > 3 else IOError
160+
with pytest.raises(exception):
161+
get_span_sampling_rules()
162+
163+
164+
def test_default_to_env_if_both_env_and_file_config(tmpdir, caplog):
165+
file = tmpdir.join("rules.json")
166+
file.write('[{"sample_rate":1.0,"service":"x","name":"ab","max_per_second":1000}]')
167+
168+
with override_env(
169+
dict(
170+
DD_SPAN_SAMPLING_RULES_FILE=str(file),
171+
DD_SPAN_SAMPLING_RULES='[{"sample_rate":0.5,"service":"xyz","name":"abc","max_per_second":100}]',
172+
)
173+
):
174+
sampling_rules = get_span_sampling_rules()
175+
assert caplog.record_tuples == [
176+
(
177+
"ddtrace.internal.sampling",
178+
30,
179+
"DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE detected. "
180+
"Defaulting to DD_SPAN_SAMPLING_RULES value.",
181+
)
182+
]
183+
assert sampling_rules[0]._sample_rate == 0.5
184+
assert sampling_rules[0]._service_matcher.pattern == "xyz"
185+
assert sampling_rules[0]._name_matcher.pattern == "abc"
186+
assert sampling_rules[0]._max_per_second == 100
187+
assert len(sampling_rules) == 1
188+
189+
122190
def traced_function(tracer, name="test_name", service="test_service", trace_sampling=False):
123191
with tracer.trace(name) as span:
124192
# If the trace sampler samples the trace, then we shouldn't add the span sampling tags
@@ -139,4 +207,4 @@ def assert_sampling_decision_tags(
139207
assert span.get_metric(_SINGLE_SPAN_SAMPLING_MAX_PER_SEC) == limit
140208

141209
if trace_sampling:
142-
assert span.get_metric(SAMPLING_PRIORITY_KEY) >= 0
210+
assert span.get_metric(SAMPLING_PRIORITY_KEY) > 0

0 commit comments

Comments
 (0)