Skip to content

Commit 98abdef

Browse files
authored
Allow double-sending metrics to another "precise" backend (#97473)
We are using an in-between service doing pre-aggregation of distribution-like (distribution and timer) metrics. This preaggregation is bad, and we would like to get rid both of the preaggregation, and the service doing it. As a first step, this allows configuring a second "precise" metrics backend, and opting into sending "precise" metrics to that second backend if it is configured. See also getsentry/getsentry#18133
1 parent 3265644 commit 98abdef

File tree

10 files changed

+97
-16
lines changed

10 files changed

+97
-16
lines changed

src/sentry/conf/server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,8 @@ def custom_parameter_sort(parameter: dict) -> tuple[str, int]:
22062206
# Internal metrics
22072207
SENTRY_METRICS_BACKEND = "sentry.metrics.dummy.DummyMetricsBackend"
22082208
SENTRY_METRICS_OPTIONS: dict[str, Any] = {}
2209+
SENTRY_METRICS_PRECISE_BACKEND: str | None = None
2210+
SENTRY_METRICS_PRECISE_OPTIONS: dict[str, Any] = {}
22092211
SENTRY_METRICS_SAMPLE_RATE = 1.0
22102212
SENTRY_METRICS_PREFIX = "sentry."
22112213
SENTRY_METRICS_SKIP_INTERNAL_PREFIXES: list[str] = [] # Order this by most frequent prefixes.

src/sentry/metrics/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def distribution(
7676
sample_rate: float = 1,
7777
unit: str | None = None,
7878
stacklevel: int = 0,
79+
precise: bool = False,
7980
) -> None:
8081
raise NotImplementedError
8182

src/sentry/metrics/datadog.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,23 @@ def distribution(
108108
sample_rate: float = 1,
109109
unit: str | None = None,
110110
stacklevel: int = 0,
111+
precise: bool = False,
111112
) -> None:
112-
# We keep the same implementation for Datadog.
113-
self.timing(key, value, instance, tags, sample_rate)
113+
if not precise:
114+
# We keep the same implementation for Datadog.
115+
return self.timing(key, value, instance, tags, sample_rate)
116+
117+
tags = dict(tags or ())
118+
119+
if self.tags:
120+
tags.update(self.tags)
121+
if instance:
122+
tags["instance"] = instance
123+
124+
tags_list = [f"{k}:{v}" for k, v in tags.items()]
125+
self.stats.distribution(
126+
self._get_key(key), value, sample_rate=sample_rate, tags=tags_list, host=self.host
127+
)
114128

115129
def event(
116130
self,

src/sentry/metrics/dogstatsd.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,21 @@ def distribution(
110110
sample_rate: float = 1,
111111
unit: str | None = None,
112112
stacklevel: int = 0,
113+
precise: bool = False,
113114
) -> None:
114-
# We keep the same implementation for Datadog.
115-
self.timing(key, value, instance, tags, sample_rate)
115+
if not precise:
116+
# We keep the same implementation for Datadog.
117+
return self.timing(key, value, instance, tags, sample_rate)
118+
119+
tags = dict(tags or ())
120+
121+
if self.tags:
122+
tags.update(self.tags)
123+
if instance:
124+
tags["instance"] = instance
125+
126+
tags_list = [f"{k}:{v}" for k, v in tags.items()]
127+
statsd.distribution(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
116128

117129
def event(
118130
self,

src/sentry/metrics/dummy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def distribution(
4848
sample_rate: float = 1,
4949
unit: str | None = None,
5050
stacklevel: int = 0,
51+
precise: bool = False,
5152
) -> None:
5253
pass
5354

src/sentry/metrics/logging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def distribution(
5252
sample_rate: float = 1,
5353
unit: str | None = None,
5454
stacklevel: int = 0,
55+
precise: bool = False,
5556
) -> None:
5657
logger.debug("%r: %+g", key, value, extra={"instance": instance, "tags": tags or {}})
5758

src/sentry/metrics/middleware.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,15 @@ def distribution(
180180
sample_rate: float = 1,
181181
unit: str | None = None,
182182
stacklevel: int = 0,
183+
precise: bool = False,
183184
) -> None:
184185
current_tags = get_current_global_tags()
185186
if tags is not None:
186187
current_tags.update(tags)
187188
current_tags = _filter_tags(key, current_tags)
188189

189190
return self.inner.distribution(
190-
key, value, instance, current_tags, sample_rate, unit, stacklevel + 1
191+
key, value, instance, current_tags, sample_rate, unit, stacklevel + 1, precise
191192
)
192193

193194
def event(

src/sentry/metrics/statsd.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ def distribution(
6161
sample_rate: float = 1,
6262
unit: str | None = None,
6363
stacklevel: int = 0,
64+
precise: bool = False,
6465
) -> None:
66+
# NOTE: the statsd client does not have a `distribution` method
6567
self.timing(key, value, instance, tags, sample_rate)
6668

6769
def event(

src/sentry/utils/metrics.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"timing",
3232
"gauge",
3333
"backend",
34+
"precise_backend", # just for mocking in tests
3435
"MutableTags",
3536
"ensure_crash_rate_in_bounds",
3637
]
@@ -40,22 +41,21 @@
4041
F = TypeVar("F", bound=Callable[..., Any])
4142

4243

43-
def get_default_backend() -> MetricsBackend:
44+
def get_default_backend() -> tuple[MetricsBackend, MetricsBackend | None]:
4445
from sentry.utils.imports import import_string
4546

46-
cls: type[MetricsBackend] = import_string(settings.SENTRY_METRICS_BACKEND)
47+
default_cls: type[MetricsBackend] = import_string(settings.SENTRY_METRICS_BACKEND)
48+
default_backend = MiddlewareWrapper(default_cls(**settings.SENTRY_METRICS_OPTIONS))
4749

48-
return MiddlewareWrapper(cls(**settings.SENTRY_METRICS_OPTIONS))
50+
precise_backend = None
51+
if precise_import := settings.SENTRY_METRICS_PRECISE_BACKEND:
52+
precise_cls: type[MetricsBackend] = import_string(precise_import)
53+
precise_backend = MiddlewareWrapper(precise_cls(**settings.SENTRY_METRICS_PRECISE_OPTIONS))
4954

55+
return default_backend, precise_backend
5056

51-
backend = get_default_backend()
5257

53-
54-
def _get_key(key: str) -> str:
55-
prefix = settings.SENTRY_METRICS_PREFIX
56-
if prefix:
57-
return f"{prefix}{key}"
58-
return key
58+
backend, precise_backend = get_default_backend()
5959

6060

6161
def _should_sample(sample_rate: float) -> bool:
@@ -168,13 +168,21 @@ def timing(
168168
tags: Tags | None = None,
169169
sample_rate: float = settings.SENTRY_METRICS_SAMPLE_RATE,
170170
stacklevel: int = 0,
171+
precise: bool = False,
171172
) -> None:
172173
try:
173174
backend.timing(key, value, instance, tags, sample_rate, stacklevel + 1)
174175
except Exception:
175176
logger = logging.getLogger("sentry.errors")
176177
logger.exception("Unable to record backend metric")
177178

179+
if precise and precise_backend:
180+
try:
181+
precise_backend.timing(key, value, instance, tags, sample_rate, stacklevel + 1)
182+
except Exception:
183+
logger = logging.getLogger("sentry.errors")
184+
logger.exception("Unable to record precise metric")
185+
178186

179187
def distribution(
180188
key: str,
@@ -184,13 +192,23 @@ def distribution(
184192
sample_rate: float = settings.SENTRY_METRICS_SAMPLE_RATE,
185193
unit: str | None = None,
186194
stacklevel: int = 0,
195+
precise: bool = False,
187196
) -> None:
188197
try:
189198
backend.distribution(key, value, instance, tags, sample_rate, unit, stacklevel + 1)
190199
except Exception:
191200
logger = logging.getLogger("sentry.errors")
192201
logger.exception("Unable to record backend metric")
193202

203+
if precise and precise_backend:
204+
try:
205+
precise_backend.distribution(
206+
key, value, instance, tags, sample_rate, unit, stacklevel + 1, precise=True
207+
)
208+
except Exception:
209+
logger = logging.getLogger("sentry.errors")
210+
logger.exception("Unable to record precise metric")
211+
194212

195213
@contextmanager
196214
def timer(
@@ -199,6 +217,7 @@ def timer(
199217
tags: Tags | None = None,
200218
sample_rate: float = settings.SENTRY_METRICS_SAMPLE_RATE,
201219
stacklevel: int = 0,
220+
precise: bool = False,
202221
) -> Generator[MutableTags]:
203222
start = time.monotonic()
204223
current_tags: MutableTags = dict(tags or ())
@@ -211,7 +230,15 @@ def timer(
211230
current_tags["result"] = "success"
212231
finally:
213232
# stacklevel must be increased by 2 because of the contextmanager indirection
214-
timing(key, time.monotonic() - start, instance, current_tags, sample_rate, stacklevel + 2)
233+
timing(
234+
key,
235+
time.monotonic() - start,
236+
instance,
237+
current_tags,
238+
sample_rate,
239+
stacklevel + 2,
240+
precise,
241+
)
215242

216243

217244
def wraps(
@@ -220,6 +247,7 @@ def wraps(
220247
tags: Tags | None = None,
221248
sample_rate: float = settings.SENTRY_METRICS_SAMPLE_RATE,
222249
stacklevel: int = 0,
250+
precise: bool = False,
223251
) -> Callable[[F], F]:
224252
def wrapper(f: F) -> F:
225253
@functools.wraps(f)
@@ -230,6 +258,7 @@ def inner(*args: Any, **kwargs: Any) -> Any:
230258
tags=tags,
231259
sample_rate=sample_rate,
232260
stacklevel=stacklevel + 1,
261+
precise=precise,
233262
):
234263
return f(*args, **kwargs)
235264

tests/sentry/metrics/test_precise.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from unittest import mock
2+
3+
from sentry.utils import metrics
4+
5+
6+
@mock.patch("sentry.utils.metrics.precise_backend")
7+
@mock.patch("sentry.utils.metrics.backend")
8+
def test_precise_distribution(backend, precise):
9+
metrics.distribution("foo", 100, tags={"some": "stuff"}, unit="byte")
10+
11+
backend.distribution.assert_called_once()
12+
precise.distribution.assert_not_called()
13+
backend.reset_mock()
14+
15+
metrics.distribution("foo", 100, tags={"some": "stuff"}, unit="byte", precise=True)
16+
17+
backend.distribution.assert_called_once()
18+
precise.distribution.assert_called_once()

0 commit comments

Comments
 (0)