feat(rpc): Default extrapolated for snql (#80830)

wmak · web-flow · commit 216450cc1d6e · 2024-11-15T13:58:14.000-05:00
- This defaults applicable functions to be extrapolated for the snql
version of EAP
diff --git a/src/sentry/search/events/datasets/spans_indexed.py b/src/sentry/search/events/datasets/spans_indexed.py
@@ -624,7 +624,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     default_result_type="rate",
                 ),
                 SnQLFunction(
-                    "count",
+                    "count_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -638,14 +638,14 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     default_result_type="integer",
                 ),
                 SnQLFunction(
-                    "sum",
+                    "sum_sample",
                     required_args=[NumericColumn("column", spans=True)],
                     snql_aggregate=self._resolve_aggregate_if("sum"),
                     result_type_fn=self.reflective_result_type(),
                     default_result_type="duration",
                 ),
                 SnQLFunction(
-                    "avg",
+                    "avg_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -655,7 +655,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p50",
+                    "p50_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -665,7 +665,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p75",
+                    "p75_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -675,7 +675,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p90",
+                    "p90_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -685,7 +685,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p95",
+                    "p95_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -695,7 +695,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p99",
+                    "p99_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -705,7 +705,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p100",
+                    "p100_sample",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -731,28 +731,22 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "count_weighted",
+                    "count",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
                     snql_aggregate=self._resolve_count_weighted,
                     default_result_type="integer",
                 ),
                 SnQLFunction(
-                    "count_unique_weighted",
-                    required_args=[ColumnTagArg("column")],
-                    snql_aggregate=self._resolve_aggregate_if("uniq"),
-                    default_result_type="integer",
-                ),
-                SnQLFunction(
-                    "sum_weighted",
+                    "sum",
                     required_args=[NumericColumn("column", spans=True)],
                     result_type_fn=self.reflective_result_type(),
                     snql_aggregate=lambda args, alias: self._resolve_sum_weighted(args, alias),
                     default_result_type="duration",
                 ),
                 SnQLFunction(
-                    "avg_weighted",
+                    "avg",
                     required_args=[NumericColumn("column", spans=True)],
                     result_type_fn=self.reflective_result_type(),
                     snql_aggregate=lambda args, alias: Function(
@@ -766,7 +760,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     default_result_type="duration",
                 ),
                 SnQLFunction(
-                    "percentile_weighted",
+                    "percentile",
                     required_args=[
                         NumericColumn("column", spans=True),
                         NumberRange("percentile", 0, 1),
@@ -777,7 +771,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p50_weighted",
+                    "p50",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -789,7 +783,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p75_weighted",
+                    "p75",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -801,7 +795,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p90_weighted",
+                    "p90",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -813,7 +807,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p95_weighted",
+                    "p95",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -825,7 +819,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p99_weighted",
+                    "p99",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -837,7 +831,7 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "p100_weighted",
+                    "p100",
                     optional_args=[
                         with_default("span.duration", NumericColumn("column", spans=True)),
                     ],
@@ -848,23 +842,6 @@ def function_converter(self) -> dict[str, SnQLFunction]:
                     default_result_type="duration",
                     redundant_grouping=True,
                 ),
-                # Min and Max are identical to their existing implementations
-                SnQLFunction(
-                    "min_weighted",
-                    required_args=[NumericColumn("column", spans=True)],
-                    snql_aggregate=self._resolve_aggregate_if("min"),
-                    result_type_fn=self.reflective_result_type(),
-                    default_result_type="duration",
-                    redundant_grouping=True,
-                ),
-                SnQLFunction(
-                    "max_weighted",
-                    required_args=[NumericColumn("column", spans=True)],
-                    snql_aggregate=self._resolve_aggregate_if("max"),
-                    result_type_fn=self.reflective_result_type(),
-                    default_result_type="duration",
-                    redundant_grouping=True,
-                ),
                 SnQLFunction(
                     "margin_of_error",
                     optional_args=[with_default("fpc", SnQLStringArg("fpc"))],
@@ -1060,14 +1037,14 @@ def _query_total_counts(self) -> tuple[float | int, float | int]:
                 dataset=self.builder.dataset,
                 params={},
                 snuba_params=self.builder.params,
-                selected_columns=["count()", "count_weighted()"],
+                selected_columns=["count_sample()", "count()"],
             )
             total_results = total_query.run_query(Referrer.API_SPANS_TOTAL_COUNT_FIELD.value)
             results = total_query.process_results(total_results)
             if len(results["data"]) != 1:
                 raise Exception("Could not query population size")
-            self._cached_count = results["data"][0]["count"]
-            self._cached_count_weighted = results["data"][0]["count_weighted"]
+            self._cached_count = results["data"][0]["count_sample"]
+            self._cached_count_weighted = results["data"][0]["count"]
         return self._cached_count, self._cached_count_weighted
 
     @cached_property
diff --git a/tests/snuba/api/endpoints/test_organization_events_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_span_indexed.py
@@ -690,21 +690,21 @@ def test_aggregate_numeric_attr_weighted(self):
             {
                 "field": [
                     "description",
-                    "count_unique_weighted(bar)",
-                    "count_unique_weighted(tags[bar])",
-                    "count_unique_weighted(tags[bar,string])",
-                    "count_weighted()",
-                    "count_weighted(span.duration)",
-                    "count_weighted(tags[foo,     number])",
-                    "sum_weighted(tags[foo,number])",
-                    "avg_weighted(tags[foo,number])",
-                    "p50_weighted(tags[foo,number])",
-                    "p75_weighted(tags[foo,number])",
-                    "p95_weighted(tags[foo,number])",
-                    "p99_weighted(tags[foo,number])",
-                    "p100_weighted(tags[foo,number])",
-                    "min_weighted(tags[foo,number])",
-                    "max_weighted(tags[foo,number])",
+                    "count_unique(bar)",
+                    "count_unique(tags[bar])",
+                    "count_unique(tags[bar,string])",
+                    "count()",
+                    "count(span.duration)",
+                    "count(tags[foo,     number])",
+                    "sum(tags[foo,number])",
+                    "avg(tags[foo,number])",
+                    "p50(tags[foo,number])",
+                    "p75(tags[foo,number])",
+                    "p95(tags[foo,number])",
+                    "p99(tags[foo,number])",
+                    "p100(tags[foo,number])",
+                    "min(tags[foo,number])",
+                    "max(tags[foo,number])",
                 ],
                 "query": "",
                 "orderby": "description",
@@ -718,21 +718,21 @@ def test_aggregate_numeric_attr_weighted(self):
         data = response.data["data"]
         assert data[0] == {
             "description": "foo",
-            "count_unique_weighted(bar)": 3,
-            "count_unique_weighted(tags[bar])": 3,
-            "count_unique_weighted(tags[bar,string])": 3,
-            "count_weighted()": 3,
-            "count_weighted(span.duration)": 3,
-            "count_weighted(tags[foo,     number])": 1,
-            "sum_weighted(tags[foo,number])": 5.0,
-            "avg_weighted(tags[foo,number])": 5.0,
-            "p50_weighted(tags[foo,number])": 5.0,
-            "p75_weighted(tags[foo,number])": 5.0,
-            "p95_weighted(tags[foo,number])": 5.0,
-            "p99_weighted(tags[foo,number])": 5.0,
-            "p100_weighted(tags[foo,number])": 5.0,
-            "min_weighted(tags[foo,number])": 5.0,
-            "max_weighted(tags[foo,number])": 5.0,
+            "count_unique(bar)": 3,
+            "count_unique(tags[bar])": 3,
+            "count_unique(tags[bar,string])": 3,
+            "count()": 3,
+            "count(span.duration)": 3,
+            "count(tags[foo,     number])": 1,
+            "sum(tags[foo,number])": 5.0,
+            "avg(tags[foo,number])": 5.0,
+            "p50(tags[foo,number])": 5.0,
+            "p75(tags[foo,number])": 5.0,
+            "p95(tags[foo,number])": 5.0,
+            "p99(tags[foo,number])": 5.0,
+            "p100(tags[foo,number])": 5.0,
+            "min(tags[foo,number])": 5.0,
+            "max(tags[foo,number])": 5.0,
         }
 
     def test_numeric_attr_without_space(self):
@@ -1016,7 +1016,7 @@ def test_margin_of_error(self):
                     "margin_of_error()",
                     "lower_count_limit()",
                     "upper_count_limit()",
-                    "count_weighted()",
+                    "count()",
                 ],
                 "query": "description:foo",
                 "project": self.project.id,
@@ -1029,7 +1029,7 @@ def test_margin_of_error(self):
         margin_of_error = data["margin_of_error()"]
         lower_limit = data["lower_count_limit()"]
         upper_limit = data["upper_count_limit()"]
-        extrapolated = data["count_weighted()"]
+        extrapolated = data["count()"]
         assert margin_of_error == pytest.approx(0.306, rel=1e-1)
         # How to read this; these results mean that the extrapolated count is
         # 500k, with a lower estimated bound of ~200k, and an upper bound of 800k