fix(eap): reliabilities should be updated even when value is null (#6863)

davidtsuk · web-flow · commit 395db9c1b191 · 2025-02-10T11:19:19.000-08:00
The reliabilities array has to be the same size as the result array. In
the previous method, their lengths could be different if some of the
aggregations returned null. Prior to the nullity overhaul, this wasn't a
concern because the aggregation couldn't be null.
diff --git a/snuba/web/rpc/v1/resolvers/common/aggregation.py b/snuba/web/rpc/v1/resolvers/common/aggregation.py
@@ -43,16 +43,12 @@ class ExtrapolationContext(ABC):
     confidence_interval: Any
     average_sample_rate: float
     sample_count: int
+    is_extrapolated: bool
 
     @property
     def is_data_present(self) -> bool:
         return self.sample_count > 0
 
-    @property
-    @abstractmethod
-    def is_extrapolated(self) -> bool:
-        raise NotImplementedError
-
     @property
     @abstractmethod
     def reliability(self) -> Reliability.ValueType:
@@ -64,6 +60,7 @@ def from_row(
         row_data: Dict[str, Any],
     ) -> ExtrapolationContext:
         value = row_data[column_label]
+        is_extrapolated = False
 
         confidence_interval = None
         average_sample_rate = 0
@@ -88,6 +85,7 @@ def from_row(
                 continue
 
             if custom_column_information.custom_column_id == "confidence_interval":
+                is_extrapolated = True
                 confidence_interval = col_value
 
                 is_percentile = custom_column_information.metadata.get(
@@ -116,25 +114,20 @@ def from_row(
                 percentile=percentile,
                 granularity=granularity,
                 width=width,
+                is_extrapolated=is_extrapolated,
             )
 
         return GenericExtrapolationContext(
             value=value,
             confidence_interval=confidence_interval,
             average_sample_rate=average_sample_rate,
             sample_count=sample_count,
+            is_extrapolated=is_extrapolated,
         )
 
 
 @dataclass(frozen=True)
 class GenericExtrapolationContext(ExtrapolationContext):
-    @property
-    def is_extrapolated(self) -> bool:
-        # We infer if a column is extrapolated or not by the presence of the
-        # confidence interval. It will be present for extrapolated aggregates
-        # but not for non-extrapolated aggregates and scalars.
-        return self.confidence_interval is not None
-
     @cached_property
     def reliability(self) -> Reliability.ValueType:
         if not self.is_extrapolated or not self.is_data_present:
@@ -161,13 +154,6 @@ class PercentileExtrapolationContext(ExtrapolationContext):
     granularity: float
     width: float
 
-    @property
-    def is_extrapolated(self) -> bool:
-        # We infer if a column is extrapolated or not by the presence of the
-        # confidence interval. It will be present for extrapolated aggregates
-        # but not for non-extrapolated aggregates and scalars.
-        return self.confidence_interval is not None
-
     @cached_property
     def reliability(self) -> Reliability.ValueType:
         if not self.is_extrapolated or not self.is_data_present:
diff --git a/snuba/web/rpc/v1/resolvers/common/trace_item_table.py b/snuba/web/rpc/v1/resolvers/common/trace_item_table.py
@@ -44,14 +44,14 @@ def convert_results(
                 extrapolation_context = ExtrapolationContext.from_row(column_name, row)
                 res[column_name].attribute_name = column_name
                 if value is None:
-
                     res[column_name].results.append(AttributeValue(is_null=True))
                 else:
                     res[column_name].results.append(converters[column_name](value))
-                    if extrapolation_context.is_extrapolated:
-                        res[column_name].reliabilities.append(
-                            extrapolation_context.reliability
-                        )
+
+                if extrapolation_context.is_extrapolated:
+                    res[column_name].reliabilities.append(
+                        extrapolation_context.reliability
+                    )
 
     column_ordering = {column.label: i for i, column in enumerate(request.columns)}
 
diff --git a/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table_extrapolation.py b/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table_extrapolation.py
@@ -784,3 +784,105 @@ def test_formula(self) -> None:
                 ],
             ),
         ]
+
+    def test_aggregation_with_nulls(self) -> None:
+        spans_storage = get_storage(StorageKey("eap_spans"))
+        start = BASE_TIME
+        messages_a = [
+            gen_message(
+                start - timedelta(minutes=i),
+                measurements={
+                    "custom_measurement": {"value": 1},
+                    "server_sample_rate": {"value": 1.0},
+                },
+                tags={"custom_tag": "a"},
+            )
+            for i in range(5)
+        ]
+        messages_b = [
+            gen_message(
+                start - timedelta(minutes=i),
+                measurements={
+                    "custom_measurement2": {"value": 1},
+                    "server_sample_rate": {"value": 1.0},
+                },
+                tags={"custom_tag": "b"},
+            )
+            for i in range(5)
+        ]
+        write_raw_unprocessed_events(spans_storage, messages_a + messages_b)  # type: ignore
+
+        ts = Timestamp(seconds=int(BASE_TIME.timestamp()))
+        hour_ago = int((BASE_TIME - timedelta(hours=1)).timestamp())
+        message = TraceItemTableRequest(
+            meta=RequestMeta(
+                project_ids=[1],
+                organization_id=1,
+                cogs_category="something",
+                referrer="something",
+                start_timestamp=Timestamp(seconds=hour_ago),
+                end_timestamp=ts,
+                trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN,
+            ),
+            columns=[
+                Column(
+                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="custom_tag")
+                ),
+                Column(
+                    aggregation=AttributeAggregation(
+                        aggregate=Function.FUNCTION_SUM,
+                        key=AttributeKey(
+                            type=AttributeKey.TYPE_DOUBLE, name="custom_measurement"
+                        ),
+                        label="sum(custom_measurement)",
+                        extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED,
+                    )
+                ),
+                Column(
+                    aggregation=AttributeAggregation(
+                        aggregate=Function.FUNCTION_SUM,
+                        key=AttributeKey(
+                            type=AttributeKey.TYPE_DOUBLE, name="custom_measurement2"
+                        ),
+                        label="sum(custom_measurement2)",
+                        extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED,
+                    )
+                ),
+            ],
+            group_by=[
+                AttributeKey(type=AttributeKey.TYPE_STRING, name="custom_tag"),
+            ],
+            order_by=[
+                TraceItemTableRequest.OrderBy(
+                    column=Column(
+                        key=AttributeKey(
+                            type=AttributeKey.TYPE_STRING, name="custom_tag"
+                        )
+                    ),
+                ),
+            ],
+            limit=5,
+        )
+        response = EndpointTraceItemTable().execute(message)
+        assert response.column_values == [
+            TraceItemColumnValues(
+                attribute_name="custom_tag",
+                results=[AttributeValue(val_str="a"), AttributeValue(val_str="b")],
+            ),
+            TraceItemColumnValues(
+                attribute_name="sum(custom_measurement)",
+                results=[AttributeValue(val_double=5), AttributeValue(is_null=True)],
+                reliabilities=[
+                    Reliability.RELIABILITY_LOW,
+                    Reliability.RELIABILITY_UNSPECIFIED,
+                ],
+            ),
+            TraceItemColumnValues(
+                attribute_name="sum(custom_measurement2)",
+                results=[AttributeValue(is_null=True), AttributeValue(val_double=5)],
+                reliabilities=[
+                    Reliability.RELIABILITY_UNSPECIFIED,
+                    Reliability.RELIABILITY_LOW,
+                ],
+            ),
+        ]