getsentry
diff --git a/‎snuba/settings/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎snuba/settings/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎snuba/web/rpc/v1/endpoint_time_series.py‎
Lines changed: 3 additions & 1 deletion b/‎snuba/web/rpc/v1/endpoint_time_series.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎snuba/web/rpc/v1/endpoint_trace_item_table.py‎
Lines changed: 20 additions & 2 deletions b/‎snuba/web/rpc/v1/endpoint_trace_item_table.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py‎
Lines changed: 31 additions & 4 deletions b/‎snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎snuba/web/rpc/v1/resolvers/common/trace_item_table.py‎
Lines changed: 134 additions & 22 deletions b/‎snuba/web/rpc/v1/resolvers/common/trace_item_table.py‎
Lines changed: 134 additions & 22 deletions
@@ -458,6 +458,10 @@ class RedisClusters(TypedDict):
 MAX_ONGOING_MUTATIONS_FOR_DELETE = 5
 SNQL_DISABLED_DATASETS: set[str] = set([])
 
+# this is the fallback default for enable_formula_reliability
+# will be overwritten by get_config i.e. snuba admin runtime config
+ENABLE_FORMULA_RELIABILITY_DEFAULT = 0
+
 
 def _load_settings(obj: MutableMapping[str, Any] = locals()) -> None:
     """Load settings from the path provided in the SNUBA_SETTINGS environment
 
@@ -16,7 +16,9 @@
     TimeSeriesRequestWrapper,
 )
 from snuba.web.rpc.v1.resolvers import ResolverTimeSeries
-from snuba.web.rpc.v1.visitors.visitor_v2 import preprocess_expression_labels
+from snuba.web.rpc.v1.visitors.time_series_request_visitor import (
+    preprocess_expression_labels,
+)
 
 _VALID_GRANULARITY_SECS = set(
     [
 
@@ -8,6 +8,8 @@
 )
 from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
 
+from snuba.settings import ENABLE_FORMULA_RELIABILITY_DEFAULT
+from snuba.state import get_int_config
 from snuba.web.rpc import RPCEndpoint, TraceItemDataResolver
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
 from snuba.web.rpc.proto_visitor import (
@@ -19,7 +21,14 @@
 from snuba.web.rpc.v1.visitors.sparse_aggregate_attribute_transformer import (
     SparseAggregateAttributeTransformer,
 )
-from snuba.web.rpc.v1.visitors.visitor_v2 import RejectTimestampAsStringVisitor
+from snuba.web.rpc.v1.visitors.time_series_request_visitor import (
+    RejectTimestampAsStringVisitor,
+)
+from snuba.web.rpc.v1.visitors.trace_item_table_request_visitor import (
+    NormalizeFormulaLabelsVisitor,
+    SetAggregateLabelsVisitor,
+    SetColumnLabelsVisitor,
+)
 
 _GROUP_BY_DISALLOWED_COLUMNS = ["timestamp"]
 
@@ -87,7 +96,16 @@ def _transform_request(request: TraceItemTableRequest) -> TraceItemTableRequest:
     This function is for initial processing and transformation of the request after recieving it.
     It is similar to the query processor step of the snql pipeline.
     """
-    return SparseAggregateAttributeTransformer(request).transform()
+    request = SparseAggregateAttributeTransformer(request).transform()
+    if get_int_config("enable_formula_reliability", ENABLE_FORMULA_RELIABILITY_DEFAULT):
+        # TODO: replace SetColumnLabelsVisitor with ValidateColumnLabelsVisitor currently blocked
+        # by sentry integration tests
+        SetColumnLabelsVisitor().visit(request)
+        # SetAggregateLabelsVisitor should come after ValidateColumnLabelsVisitor because it
+        # relies on the labels in the columns being set.
+        SetAggregateLabelsVisitor().visit(request)
+        NormalizeFormulaLabelsVisitor().visit(request)
+    return request
 
 
 class EndpointTraceItemTable(
 
@@ -34,6 +34,8 @@
 from snuba.query.logical import Query
 from snuba.query.query_settings import HTTPQuerySettings
 from snuba.request import Request as SnubaRequest
+from snuba.settings import ENABLE_FORMULA_RELIABILITY_DEFAULT
+from snuba.state import get_int_config
 from snuba.web.query import run_query
 from snuba.web.rpc.common.common import (
     add_existence_check_to_subscriptable_references,
@@ -195,7 +197,31 @@ def _get_reliability_context_columns(
     """
     extrapolated aggregates need to request extra columns to calculate the reliability of the result.
     this function returns the list of columns that need to be requested.
+
+    If alias_prefix is provided, it will be prepended to the alias of the returned columns.
     """
+
+    if column.HasField("formula"):
+        if not get_int_config(
+            "enable_formula_reliability", ENABLE_FORMULA_RELIABILITY_DEFAULT
+        ):
+            return []
+        # also query for the left and right parts of the formula separately
+        # this will be used later to calculate the reliability of the formula
+        # ex: SELECT agg1/agg2 will become SELECT agg1/agg2, agg1, agg2
+        context_cols = []
+        for col in [column.formula.left, column.formula.right]:
+            if not col.HasField("formula"):
+                context_cols.append(
+                    SelectedExpression(
+                        name=col.label,
+                        expression=_column_to_expression(col, request_meta),
+                    )
+                )
+            context_cols.extend(_get_reliability_context_columns(col, request_meta))
+
+        return context_cols
+
     if not (column.HasField("conditional_aggregation")):
         return []
 
@@ -220,16 +246,17 @@ def _get_reliability_context_columns(
             column.conditional_aggregation,
             attribute_key_to_expression_eap_items,
         )
-        count_column = get_count_column(
-            column.conditional_aggregation,
-            attribute_key_to_expression_eap_items,
-        )
         context_columns.append(
             SelectedExpression(
                 name=average_sample_rate_column.alias,
                 expression=average_sample_rate_column,
             )
         )
+
+        count_column = get_count_column(
+            column.conditional_aggregation,
+            attribute_key_to_expression_eap_items,
+        )
         context_columns.append(
             SelectedExpression(name=count_column.alias, expression=count_column)
         )
 
@@ -1,41 +1,134 @@
+import re
 from collections import defaultdict
 from typing import Any, Callable, Dict, Iterable
 
 from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import (
+    Column,
     TraceItemColumnValues,
     TraceItemTableRequest,
 )
-from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
+from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
+    AttributeKey,
+    AttributeValue,
+    Reliability,
+)
 
+from snuba.settings import ENABLE_FORMULA_RELIABILITY_DEFAULT
+from snuba.state import get_int_config
 from snuba.web.rpc.common.exceptions import BadSnubaRPCRequestException
 from snuba.web.rpc.v1.resolvers.common.aggregation import ExtrapolationContext
 
 
-def convert_results(
-    request: TraceItemTableRequest, data: Iterable[Dict[str, Any]]
-) -> list[TraceItemColumnValues]:
-    converters: Dict[str, Callable[[Any], AttributeValue]] = {}
-
-    for column in request.columns:
-        if column.HasField("key"):
-            if column.key.type == AttributeKey.TYPE_BOOLEAN:
-                converters[column.label] = lambda x: AttributeValue(val_bool=bool(x))
-            elif column.key.type == AttributeKey.TYPE_STRING:
-                converters[column.label] = lambda x: AttributeValue(val_str=str(x))
-            elif column.key.type == AttributeKey.TYPE_INT:
-                converters[column.label] = lambda x: AttributeValue(val_int=int(x))
-            elif column.key.type == AttributeKey.TYPE_FLOAT:
-                converters[column.label] = lambda x: AttributeValue(val_float=float(x))
-            elif column.key.type == AttributeKey.TYPE_DOUBLE:
-                converters[column.label] = lambda x: AttributeValue(val_double=float(x))
-        elif column.HasField("conditional_aggregation"):
-            converters[column.label] = lambda x: AttributeValue(val_double=float(x))
-        elif column.HasField("formula"):
+def _add_converter(
+    column: Column, converters: Dict[str, Callable[[Any], AttributeValue]]
+) -> None:
+    if column.HasField("key"):
+        if column.key.type == AttributeKey.TYPE_BOOLEAN:
+            converters[column.label] = lambda x: AttributeValue(val_bool=bool(x))
+        elif column.key.type == AttributeKey.TYPE_STRING:
+            converters[column.label] = lambda x: AttributeValue(val_str=str(x))
+        elif column.key.type == AttributeKey.TYPE_INT:
+            converters[column.label] = lambda x: AttributeValue(val_int=int(x))
+        elif column.key.type == AttributeKey.TYPE_FLOAT:
+            converters[column.label] = lambda x: AttributeValue(val_float=float(x))
+        elif column.key.type == AttributeKey.TYPE_DOUBLE:
             converters[column.label] = lambda x: AttributeValue(val_double=float(x))
         else:
             raise BadSnubaRPCRequestException(
-                "column is not one of: attribute, (conditional) aggregation, or formula"
+                f"unknown attribute type: {AttributeKey.Type.Name(column.key.type)}"
             )
+    elif column.HasField("conditional_aggregation"):
+        converters[column.label] = lambda x: AttributeValue(val_double=float(x))
+    elif column.HasField("formula"):
+        converters[column.label] = lambda x: AttributeValue(val_double=float(x))
+        if get_int_config(
+            "enable_formula_reliability", ENABLE_FORMULA_RELIABILITY_DEFAULT
+        ):
+            _add_converter(column.formula.left, converters)
+            _add_converter(column.formula.right, converters)
+    elif column.HasField("literal"):
+        converters[column.label] = lambda x: AttributeValue(val_double=float(x))
+    else:
+        raise BadSnubaRPCRequestException(
+            "column is not one of: attribute, (conditional) aggregation, or formula"
+        )
+
+
+def get_converters_for_columns(
+    columns: Iterable[Column],
+) -> Dict[str, Callable[[Any], AttributeValue]]:
+    """
+    Returns a dictionary of column labels to their corresponding converters.
+    Converters are functions that convert a value returned by a clickhouse query to an AttributeValue.
+    """
+    converters: Dict[str, Callable[[Any], AttributeValue]] = {}
+    for column in columns:
+        _add_converter(column, converters)
+    return converters
+
+
+def _is_sub_column(result_column_name: str, column: Column) -> bool:
+    """
+    returns true if result_column_name is a sub column of column. false otherwise.
+    """
+    # this logic could theoretically cause issue if the user passes in such a column label to a non-subcolumn.
+    # for now, we assume that the user will not do this.
+    return bool(
+        re.fullmatch(rf"{re.escape(column.label)}(\.left|\.right)+", result_column_name)
+    )
+
+
+def _get_reliabilities_for_formula(
+    column: Column, res: Dict[str, TraceItemColumnValues]
+) -> list[Reliability.ValueType]:
+    """
+    Compute and return the reliabilities for the given formula column,
+    based on the reliabilities of the left and right parts.
+
+    Ex:
+    When users send a request with a formula such as sum(B)/min(B)
+    we also separately query for sum(B), min(B) separately (earlier in the codebase).
+    Thus, we already have the reliabilities for sum(B), min(B) in res labels as .left and .right.
+    We use them in this function to compute the reliability of the formula, based on the following:
+    a formula is reliable iff all of its parts are reliable (.left and .right)
+    ex: (agg1 + agg2) / agg3 * agg4 is reliable iff agg1, agg2, agg3, agg4 are reliable.
+
+    Select A, sum(B)/min(B) AS agg GROUP BY A
+    +----+--------------+----------+--------------+
+    | A  |     agg      | agg.left |  agg.right   |
+    +----+--------------+----------+--------------+
+    | A1 | reliable     | reliable | reliable     |
+    | A2 | not reliable | reliable | not reliable |
+    | A3 | reliable     | reliable | reliable     |
+    +----+--------------+----------+--------------+
+    you can see that each column has a reliability for each group by. and the reliabilities of agg is determined
+    based on the reliabilities of agg.left and agg.right. In this case the function would return
+    [reliable, not reliable, reliable]
+    """
+
+    reliable_so_far: list[Reliability.ValueType] = []
+    for resname, resvalue in res.items():
+        if _is_sub_column(resname, column):
+            for i, reliability in enumerate(resvalue.reliabilities):
+                if len(reliable_so_far) <= i:
+                    # bc we are extending as we go, it should only ever be 1 behind
+                    assert i == len(reliable_so_far)
+                    reliable_so_far.append(reliability)
+                else:
+                    if reliability not in [
+                        Reliability.RELIABILITY_UNSPECIFIED,
+                        Reliability.RELIABILITY_LOW,
+                        Reliability.RELIABILITY_HIGH,
+                    ]:
+                        raise ValueError(f"Invalid reliability: {reliability}")
+                    reliable_so_far[i] = min(reliable_so_far[i], reliability)
+    return reliable_so_far
+
+
+def convert_results(
+    request: TraceItemTableRequest, data: Iterable[Dict[str, Any]]
+) -> list[TraceItemColumnValues]:
+    converters = get_converters_for_columns(request.columns)
 
     res: defaultdict[str, TraceItemColumnValues] = defaultdict(TraceItemColumnValues)
     for row in data:
@@ -53,6 +146,25 @@ def convert_results(
                         extrapolation_context.reliability
                     )
 
+    if get_int_config("enable_formula_reliability", ENABLE_FORMULA_RELIABILITY_DEFAULT):
+        # add formula reliabilities, remove the left and right parts
+        for column in request.columns:
+            if column.HasField("formula") and column.label in res:
+                # compute the reliabilities for the formula
+                reliabilities = _get_reliabilities_for_formula(column, res)
+                # set the reliabilities of the formula to be the ones we calculated
+                while len(res[column.label].reliabilities) > 0:
+                    res[column.label].reliabilities.pop()
+                for e in reliabilities:
+                    assert e is not None
+                    res[column.label].reliabilities.append(e)
+
+        # remove any columns that were not explicitly requested by the user in the request
+        requested_column_labels = set(e.label for e in request.columns)
+        to_delete = list(filter(lambda k: k not in requested_column_labels, res.keys()))
+        for name in to_delete:
+            del res[name]
+
     column_ordering = {column.label: i for i, column in enumerate(request.columns)}
 
     return list(
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,9 @@`
`16`	`16`	`TimeSeriesRequestWrapper,`
`17`	`17`	`)`
`18`	`18`	`from snuba.web.rpc.v1.resolvers import ResolverTimeSeries`
`19`		`-from snuba.web.rpc.v1.visitors.visitor_v2 import preprocess_expression_labels`
	`19`	`+from snuba.web.rpc.v1.visitors.time_series_request_visitor import (`
	`20`	`+ preprocess_expression_labels,`
	`21`	`+)`
`20`	`22`
`21`	`23`	`_VALID_GRANULARITY_SECS = set(`
`22`	`24`	`[`