Skip to content

Commit 78c67c8

Browse files
Swatinemandrewshie-sentry
authored andcommitted
Allow multiple dogstatsd instances, record precise timing distributions (#97662)
This fixes the `DogStatsdMetricsBackend` so that it does not depend on the globally configured `initialize/statsd` singleton, but rather configures its own statsd instance. Also, this is now capturing precise timings as `distribution`, as the existing `timing` type has special handling within datadog/agent such that it again splits the metric into multiple pre-aggregations.
1 parent 5684275 commit 78c67c8

File tree

8 files changed

+52
-35
lines changed

8 files changed

+52
-35
lines changed

src/sentry/metrics/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def timing(
5252
tags: Tags | None = None,
5353
sample_rate: float = 1,
5454
stacklevel: int = 0,
55+
precise: bool = False,
5556
) -> None:
5657
raise NotImplementedError
5758

src/sentry/metrics/datadog.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def timing(
6464
tags: Tags | None = None,
6565
sample_rate: float = 1,
6666
stacklevel: int = 0,
67+
precise: bool = False,
6768
) -> None:
6869
tags = dict(tags or ())
6970

@@ -73,9 +74,14 @@ def timing(
7374
tags["instance"] = instance
7475

7576
tags_list = [f"{k}:{v}" for k, v in tags.items()]
76-
self.stats.timing(
77-
self._get_key(key), value, sample_rate=sample_rate, tags=tags_list, host=self.host
78-
)
77+
if not precise:
78+
self.stats.timing(
79+
self._get_key(key), value, sample_rate=sample_rate, tags=tags_list, host=self.host
80+
)
81+
else:
82+
self.stats.distribution(
83+
self._get_key(key), value, sample_rate=sample_rate, tags=tags_list, host=self.host
84+
)
7985

8086
def gauge(
8187
self,

src/sentry/metrics/dogstatsd.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,41 @@
11
import atexit
22
from typing import Any
33

4-
from datadog import initialize
5-
from datadog.dogstatsd.base import statsd
4+
from datadog.dogstatsd.base import DogStatsd
65

76
from .base import MetricsBackend, Tags
87

98
__all__ = ["DogStatsdMetricsBackend"]
109

11-
# Set the maximum number of packets to queue for the sender.
12-
# How may packets to queue before blocking or dropping the packet if the packet queue is already full.
13-
# 0 means unlimited.
14-
SENDER_QUEUE_SIZE = 0
15-
16-
# Set timeout for packet queue operations, in seconds
17-
# How long the application thread is willing to wait for the queue clear up before dropping the metric packet.
18-
# If set to None, wait forever.
19-
# If set to zero drop the packet immediately if the queue is full.
20-
SENDER_QUEUE_TIMEOUT = 0
21-
2210

2311
class DogStatsdMetricsBackend(MetricsBackend):
2412
def __init__(self, prefix: str | None = None, **kwargs: Any) -> None:
25-
# TODO(dcramer): it'd be nice if the initialize call wasn't a global
2613
self.tags = kwargs.pop("tags", None)
27-
kwargs["statsd_disable_buffering"] = False
28-
29-
initialize(**kwargs)
30-
statsd.disable_telemetry()
3114

32-
# When enabled, a background thread will be used to send metric payloads to the Agent.
33-
statsd.enable_background_sender(
34-
sender_queue_size=SENDER_QUEUE_SIZE, sender_queue_timeout=SENDER_QUEUE_TIMEOUT
35-
)
36-
# Applications should call wait_for_pending() before exiting to make sure all pending payloads are sent.
37-
atexit.register(statsd.wait_for_pending)
15+
instance_kwargs: dict[str, Any] = {
16+
"disable_telemetry": True,
17+
"disable_buffering": False,
18+
# When enabled, a background thread will be used to send metric payloads to the Agent.
19+
"disable_background_sender": False,
20+
}
21+
if socket_path := kwargs.get("statsd_socket_path"):
22+
instance_kwargs["socket_path"] = socket_path
23+
else:
24+
if host := kwargs.get("statsd_host"):
25+
instance_kwargs["host"] = host
26+
if port := kwargs.get("statsd_port"):
27+
instance_kwargs["port"] = int(port)
28+
29+
self.statsd = DogStatsd(**instance_kwargs)
3830

3931
# Origin detection is enabled after 0.45 by default.
4032
# Disable it since it silently fails.
4133
# Ref: https://github.com/DataDog/datadogpy/issues/764
42-
statsd._container_id = None
34+
self.statsd._container_id = None
35+
36+
# Applications should call wait_for_pending() before exiting to make sure all pending payloads are sent.
37+
atexit.register(self.statsd.wait_for_pending)
38+
4339
super().__init__(prefix=prefix)
4440

4541
def incr(
@@ -60,7 +56,7 @@ def incr(
6056
tags["instance"] = instance
6157

6258
tags_list = [f"{k}:{v}" for k, v in tags.items()]
63-
statsd.increment(self._get_key(key), amount, sample_rate=sample_rate, tags=tags_list)
59+
self.statsd.increment(self._get_key(key), amount, sample_rate=sample_rate, tags=tags_list)
6460

6561
def timing(
6662
self,
@@ -70,6 +66,7 @@ def timing(
7066
tags: Tags | None = None,
7167
sample_rate: float = 1,
7268
stacklevel: int = 0,
69+
precise: bool = False,
7370
) -> None:
7471
tags = dict(tags or ())
7572

@@ -79,7 +76,12 @@ def timing(
7976
tags["instance"] = instance
8077

8178
tags_list = [f"{k}:{v}" for k, v in tags.items()]
82-
statsd.timing(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
79+
if not precise:
80+
self.statsd.timing(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
81+
else:
82+
self.statsd.distribution(
83+
self._get_key(key), value, sample_rate=sample_rate, tags=tags_list
84+
)
8385

8486
def gauge(
8587
self,
@@ -99,7 +101,7 @@ def gauge(
99101
tags["instance"] = instance
100102

101103
tags_list = [f"{k}:{v}" for k, v in tags.items()]
102-
statsd.gauge(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
104+
self.statsd.gauge(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
103105

104106
def distribution(
105107
self,
@@ -124,7 +126,7 @@ def distribution(
124126
tags["instance"] = instance
125127

126128
tags_list = [f"{k}:{v}" for k, v in tags.items()]
127-
statsd.distribution(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
129+
self.statsd.distribution(self._get_key(key), value, sample_rate=sample_rate, tags=tags_list)
128130

129131
def event(
130132
self,
@@ -146,7 +148,7 @@ def event(
146148
tags["instance"] = instance
147149

148150
tags_list = [f"{k}:{v}" for k, v in tags.items()]
149-
statsd.event(
151+
self.statsd.event(
150152
title=title,
151153
message=message,
152154
alert_type=alert_type,

src/sentry/metrics/dummy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def timing(
2424
tags: Tags | None = None,
2525
sample_rate: float = 1,
2626
stacklevel: int = 0,
27+
precise: bool = False,
2728
) -> None:
2829
pass
2930

src/sentry/metrics/logging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def timing(
2626
tags: Tags | None = None,
2727
sample_rate: float = 1,
2828
stacklevel: int = 0,
29+
precise: bool = False,
2930
) -> None:
3031
logger.debug(
3132
"%r: %g ms", key, value * 1000, extra={"instance": instance, "tags": tags or {}}

src/sentry/metrics/middleware.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,16 @@ def timing(
144144
tags: Tags | None = None,
145145
sample_rate: float = 1,
146146
stacklevel: int = 0,
147+
precise: bool = False,
147148
) -> None:
148149
current_tags = get_current_global_tags()
149150
if tags is not None:
150151
current_tags.update(tags)
151152
current_tags = _filter_tags(key, current_tags)
152153

153-
return self.inner.timing(key, value, instance, current_tags, sample_rate, stacklevel + 1)
154+
return self.inner.timing(
155+
key, value, instance, current_tags, sample_rate, stacklevel + 1, precise
156+
)
154157

155158
def gauge(
156159
self,

src/sentry/metrics/statsd.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def timing(
3737
tags: Tags | None = None,
3838
sample_rate: float = 1,
3939
stacklevel: int = 0,
40+
precise: bool = False,
4041
) -> None:
4142
self.client.timing(self._full_key(self._get_key(key)), value, sample_rate)
4243

src/sentry/utils/metrics.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ def timing(
178178

179179
if precise and precise_backend:
180180
try:
181-
precise_backend.timing(key, value, instance, tags, sample_rate, stacklevel + 1)
181+
precise_backend.timing(
182+
key, value, instance, tags, sample_rate, stacklevel + 1, precise=True
183+
)
182184
except Exception:
183185
logger = logging.getLogger("sentry.errors")
184186
logger.exception("Unable to record precise metric")

0 commit comments

Comments
 (0)