-
Notifications
You must be signed in to change notification settings - Fork 562
feat(metrics): Add trace metrics behind an experiments flag #4898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,157 @@ | ||||||
import os | ||||||
import random | ||||||
import threading | ||||||
from datetime import datetime, timezone | ||||||
from typing import Optional, List, Callable, TYPE_CHECKING, Any | ||||||
|
||||||
from sentry_sdk.utils import format_timestamp, safe_repr | ||||||
from sentry_sdk.envelope import Envelope, Item, PayloadRef | ||||||
|
||||||
if TYPE_CHECKING: | ||||||
from sentry_sdk._types import TraceMetric | ||||||
|
||||||
|
||||||
class TraceMetricsBatcher: | ||||||
MAX_METRICS_BEFORE_FLUSH = 100 | ||||||
FLUSH_WAIT_TIME = 5.0 | ||||||
|
||||||
def __init__( | ||||||
self, | ||||||
capture_func, # type: Callable[[Envelope], None] | ||||||
): | ||||||
# type: (...) -> None | ||||||
self._metric_buffer = [] # type: List[TraceMetric] | ||||||
self._capture_func = capture_func | ||||||
self._running = True | ||||||
self._lock = threading.Lock() | ||||||
|
||||||
self._flush_event = threading.Event() # type: threading.Event | ||||||
|
||||||
self._flusher = None # type: Optional[threading.Thread] | ||||||
self._flusher_pid = None # type: Optional[int] | ||||||
|
||||||
def _ensure_thread(self): | ||||||
# type: (...) -> bool | ||||||
if not self._running: | ||||||
return False | ||||||
|
||||||
pid = os.getpid() | ||||||
if self._flusher_pid == pid: | ||||||
return True | ||||||
|
||||||
with self._lock: | ||||||
if self._flusher_pid == pid: | ||||||
return True | ||||||
|
||||||
self._flusher_pid = pid | ||||||
|
||||||
self._flusher = threading.Thread(target=self._flush_loop) | ||||||
self._flusher.daemon = True | ||||||
|
||||||
try: | ||||||
self._flusher.start() | ||||||
except RuntimeError: | ||||||
self._running = False | ||||||
return False | ||||||
|
||||||
return True | ||||||
|
||||||
def _flush_loop(self): | ||||||
# type: (...) -> None | ||||||
while self._running: | ||||||
self._flush_event.wait(self.FLUSH_WAIT_TIME + random.random()) | ||||||
self._flush_event.clear() | ||||||
self._flush() | ||||||
|
||||||
def add( | ||||||
self, | ||||||
metric, # type: TraceMetric | ||||||
): | ||||||
# type: (...) -> None | ||||||
if not self._ensure_thread() or self._flusher is None: | ||||||
return None | ||||||
|
||||||
with self._lock: | ||||||
self._metric_buffer.append(metric) | ||||||
if len(self._metric_buffer) >= self.MAX_METRICS_BEFORE_FLUSH: | ||||||
self._flush_event.set() | ||||||
|
||||||
def kill(self): | ||||||
# type: (...) -> None | ||||||
if self._flusher is None: | ||||||
return | ||||||
|
||||||
self._running = False | ||||||
self._flush_event.set() | ||||||
self._flusher = None | ||||||
|
||||||
def flush(self): | ||||||
# type: (...) -> None | ||||||
self._flush() | ||||||
|
||||||
@staticmethod | ||||||
def _metric_to_transport_format(metric): | ||||||
# type: (TraceMetric) -> Any | ||||||
def format_attribute(val): | ||||||
# type: (int | float | str | bool) -> Any | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential bug: The type hint
|
# type: (int | float | str | bool) -> Any | |
# type: Union[int, float, str, bool] -> Any |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -235,6 +235,32 @@ class SDKInfo(TypedDict): | |
}, | ||
) | ||
|
||
TraceMetricType = Literal["counter", "gauge", "distribution"] | ||
|
||
TraceMetricAttributeValue = TypedDict( | ||
"TraceMetricAttributeValue", | ||
{ | ||
"value": Union[str, bool, float, int], | ||
"type": Literal["string", "boolean", "double", "integer"], | ||
}, | ||
) | ||
|
||
TraceMetric = TypedDict( | ||
"TraceMetric", | ||
{ | ||
"timestamp": float, | ||
"trace_id": str, | ||
"span_id": Optional[str], | ||
"name": str, | ||
"type": TraceMetricType, | ||
"value": float, | ||
"unit": Optional[str], | ||
"attributes": dict[str, TraceMetricAttributeValue], | ||
}, | ||
) | ||
|
||
TraceMetricProcessor = Callable[[TraceMetric, Hint], Optional[TraceMetric]] | ||
|
||
# TODO: Make a proper type definition for this (PRs welcome!) | ||
Breadcrumb = Dict[str, Any] | ||
|
||
|
@@ -270,6 +296,7 @@ class SDKInfo(TypedDict): | |
"monitor", | ||
"span", | ||
"log_item", | ||
"trace_metric", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
] | ||
SessionStatus = Literal["ok", "exited", "crashed", "abnormal"] | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,9 @@ | |
is_gevent, | ||
logger, | ||
get_before_send_log, | ||
get_before_send_metric, | ||
has_logs_enabled, | ||
has_trace_metrics_enabled, | ||
) | ||
from sentry_sdk.serializer import serialize | ||
from sentry_sdk.tracing import trace | ||
|
@@ -184,6 +186,7 @@ def __init__(self, options=None): | |
self.monitor = None # type: Optional[Monitor] | ||
self.metrics_aggregator = None # type: Optional[MetricsAggregator] | ||
self.log_batcher = None # type: Optional[LogBatcher] | ||
self.trace_metrics_batcher = None # type: Optional[TraceMetricsBatcher] | ||
|
||
def __getstate__(self, *args, **kwargs): | ||
# type: (*Any, **Any) -> Any | ||
|
@@ -219,6 +222,10 @@ def _capture_experimental_log(self, log): | |
# type: (Log) -> None | ||
pass | ||
|
||
def _capture_trace_metric(self, metric): | ||
# type: (TraceMetric) -> None | ||
pass | ||
|
||
def capture_session(self, *args, **kwargs): | ||
# type: (*Any, **Any) -> None | ||
return None | ||
|
@@ -388,6 +395,13 @@ def _capture_envelope(envelope): | |
|
||
self.log_batcher = LogBatcher(capture_func=_capture_envelope) | ||
|
||
self.trace_metrics_batcher = None | ||
|
||
if has_trace_metrics_enabled(self.options): | ||
from sentry_sdk._trace_metrics_batcher import TraceMetricsBatcher | ||
|
||
self.trace_metrics_batcher = TraceMetricsBatcher(capture_func=_capture_envelope) | ||
|
||
max_request_body_size = ("always", "never", "small", "medium") | ||
if self.options["max_request_body_size"] not in max_request_body_size: | ||
raise ValueError( | ||
|
@@ -967,6 +981,55 @@ def _capture_experimental_log(self, log): | |
if self.log_batcher: | ||
self.log_batcher.add(log) | ||
|
||
def _capture_trace_metric(self, metric): | ||
# type: (Optional[TraceMetric]) -> None | ||
if not has_trace_metrics_enabled(self.options) or metric is None: | ||
return | ||
|
||
current_scope = sentry_sdk.get_current_scope() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we using this anywhere in the function? The analogous log function above uses the current scope for populating |
||
isolation_scope = sentry_sdk.get_isolation_scope() | ||
|
||
metric["attributes"]["sentry.sdk.name"] = SDK_INFO["name"] | ||
metric["attributes"]["sentry.sdk.version"] = SDK_INFO["version"] | ||
|
||
environment = self.options.get("environment") | ||
if environment is not None and "sentry.environment" not in metric["attributes"]: | ||
metric["attributes"]["sentry.environment"] = environment | ||
|
||
release = self.options.get("release") | ||
if release is not None and "sentry.release" not in metric["attributes"]: | ||
metric["attributes"]["sentry.release"] = release | ||
|
||
if isolation_scope._user is not None: | ||
for metric_attribute, user_attribute in ( | ||
("user.id", "id"), | ||
("user.name", "username"), | ||
("user.email", "email"), | ||
): | ||
if ( | ||
user_attribute in isolation_scope._user | ||
and metric_attribute not in metric["attributes"] | ||
): | ||
metric["attributes"][metric_attribute] = isolation_scope._user[ | ||
user_attribute | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: TraceMetric Attributes Type MismatchThe Additional Locations (1) |
||
|
||
debug = self.options.get("debug", False) | ||
if debug: | ||
logger.debug( | ||
f"[Sentry Metrics] [{metric.get('type')}] {metric.get('name')}: {metric.get('value')}" | ||
) | ||
|
||
before_send_metric = get_before_send_metric(self.options) | ||
if before_send_metric is not None: | ||
metric = before_send_metric(metric, {}) | ||
|
||
if metric is None: | ||
return | ||
|
||
if self.trace_metrics_batcher: | ||
self.trace_metrics_batcher.add(metric) | ||
|
||
def capture_session( | ||
self, | ||
session, # type: Session | ||
|
@@ -1023,6 +1086,8 @@ def close( | |
self.metrics_aggregator.kill() | ||
if self.log_batcher is not None: | ||
self.log_batcher.kill() | ||
if self.trace_metrics_batcher is not None: | ||
self.trace_metrics_batcher.kill() | ||
if self.monitor: | ||
self.monitor.kill() | ||
self.transport.kill() | ||
|
@@ -1049,6 +1114,8 @@ def flush( | |
self.metrics_aggregator.flush() | ||
if self.log_batcher is not None: | ||
self.log_batcher.flush() | ||
if self.trace_metrics_batcher is not None: | ||
self.trace_metrics_batcher.flush() | ||
self.transport.flush(timeout=timeout, callback=callback) | ||
|
||
def __enter__(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -285,6 +285,8 @@ def data_category(self): | |
return "error" | ||
elif ty == "log": | ||
return "log_item" | ||
elif ty == "trace_metric": | ||
return "trace_metric" | ||
Comment on lines
+288
to
+289
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment as above, if the rate limit category is actually called |
||
elif ty == "client_report": | ||
return "internal" | ||
elif ty == "profile": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should try to avoid circular imports. See the other comment I left. We also don't re-export many other modules. Can we remove the re-export?
To my knowledge the only downside is that it wouldn't be included in
from sentry_sdk import *
. If there's a good reason to keep it we should still try to avoid the circular import when importing at the top of a file.Note that most other modules are not re-exported here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's please not re-export here, in addition to what @alexander-alderman-webb says the feature is also experimental so let's keep it more hidden for now.