fix(eap): Handle nans in other timeseries for EAP (#82131)

Zylphrex · web-flow · commit 11e7079dd3db · 2024-12-17T09:30:33.000-05:00
This fixes the nans in the other timeseries for EAP when some numeric
attribute does not exist for a time bucket, the avg aggregate will
produce a nan. It does not fix it for the spans indexed dataset because
the query there is actually wrong and treats entries without the
attribute as 0. It also does not fix it for the rpc calls because rpc
calls are failing with a division by 0 error.
diff --git a/src/sentry/snuba/spans_eap.py b/src/sentry/snuba/spans_eap.py
@@ -1,6 +1,7 @@
 import logging
 from collections.abc import Mapping, Sequence
 from datetime import timedelta
+from typing import Any, TypedDict
 
 import sentry_sdk
 from snuba_sdk import Column, Condition
@@ -244,14 +245,16 @@ def top_events_timeseries(
             snuba_params.end_date,
             rollup,
         )
+
     with sentry_sdk.start_span(op="spans_indexed", name="top_events.transform_results") as span:
         span.set_data("result_count", len(result.get("data", [])))
         result = top_events_builder.process_results(result)
+        other_result = top_events_builder.process_results(other_result)
 
         issues: Mapping[int, str | None] = {}
         translated_groupby = top_events_builder.translated_groupby
 
-        results = (
+        results: dict[str, TimeseriesResult] = (
             {discover.OTHER_KEY: {"order": limit, "data": other_result["data"]}}
             if len(other_result.get("data", []))
             else {}
@@ -292,3 +295,8 @@ def top_events_timeseries(
             )
 
     return top_events_results
+
+
+class TimeseriesResult(TypedDict):
+    order: int
+    data: list[dict[str, Any]]
diff --git a/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py
@@ -90,6 +90,61 @@ def test_handle_nans_from_snuba(self):
         )
         assert response.status_code == 200, response.content
 
+    def test_handle_nans_from_snuba_top_n(self):
+        self.store_spans(
+            [
+                self.create_span(
+                    {
+                        "description": "foo",
+                        "measurements": {"lcp": {"value": 1}},
+                    },
+                    start_ts=self.day_ago,
+                ),
+                self.create_span({"description": "foo"}, start_ts=self.day_ago),
+                self.create_span({"description": "foo"}, start_ts=self.two_days_ago),
+                self.create_span(
+                    {
+                        "description": "bar",
+                        "measurements": {"lcp": {"value": 2}},
+                    },
+                    start_ts=self.day_ago,
+                ),
+                self.create_span({"description": "bar"}, start_ts=self.day_ago),
+                self.create_span({"description": "bar"}, start_ts=self.two_days_ago),
+            ],
+            is_eap=self.is_eap,
+        )
+
+        response = self._do_request(
+            data={
+                "field": ["span.description", "p50(measurements.lcp)", "avg(measurements.lcp)"],
+                "yAxis": ["p50(measurements.lcp)", "avg(measurements.lcp)"],
+                "project": self.project.id,
+                "dataset": self.dataset,
+                "excludeOther": 0,
+                "topEvents": 1,
+                "partial": 1,
+                "per_page": 50,
+                "interval": "1d",
+                "statsPeriod": "7d",
+                "orderby": "-avg_measurements_lcp",
+                "sort": "-avg_measurements_lcp",
+            },
+        )
+        assert response.status_code == 200, response.content
+
+        # We cannot actually assert the value because the `spans_indexed` is
+        # producing the wrong result and treating missing values as 0s which
+        # skews the final aggregation.
+        # This is also the reason it never errored because snuba never returns
+        # nans in this situation.
+        #
+        # for k in response.data:
+        #     for agg in ["p50(measurements.lcp)", "avg(measurements.lcp)"]:
+        #         for row in response.data[k][agg]["data"]:
+        #             assert row[1][0]["count"] in {0, 1, 2}
+        # assert response.data["Other"]
+
     def test_count_unique(self):
         event_counts = [6, 0, 6, 3, 0, 3]
         for hour, count in enumerate(event_counts):
@@ -857,3 +912,7 @@ def test_invalid_intervals(self):
             },
         )
         assert response.status_code == 400, response.content
+
+    @pytest.mark.xfail(reason="division by 0 error in snuba")
+    def test_handle_nans_from_snuba_top_n(self):
+        super().test_handle_nans_from_snuba_top_n()