getsentry · thetruecpaul · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py
@@ -3681,6 +3681,26 @@
     flags=FLAG_AUTOMATOR_MODIFIABLE,
 )
 
+# Controls whether an org should read data both from Snuba and EAP.
+# Will not use or display the EAP data to the user; rather, will just compare the
+# data from each source and log whether they match.
+register(
+    "eap.occurrences.should_double_read",
+    type=Bool,
+    default=False,
+    flags=FLAG_MODIFIABLE_BOOL | FLAG_AUTOMATOR_MODIFIABLE,
+)
+
+# Controls whether a callsite should use EAP data instead of Snuba data.
+# Callsites should only be added after they're known to be safe.
+register(
+    "eap.occurrences.callsites_using_eap_data_allowlist",
+    type=Sequence,
+    default=[],
+    flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
+)
+
+
 # Killswich for LLM issue detection
 register(
     "issue-detection.llm-detection.enabled",

@@ -0,0 +1,40 @@
+from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
+from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
+
+from sentry.search.eap.columns import ColumnDefinitions, ResolvedAttribute
+from sentry.search.eap.common_columns import COMMON_COLUMNS
+
+OCCURRENCES_ALWAYS_PRESENT_ATTRIBUTES = [
+    AttributeKey(name="group_id", type=AttributeKey.Type.TYPE_INT),
+]
+
+
+OCCURRENCE_COLUMNS = {
+    column.public_alias: column
+    for column in (
+        COMMON_COLUMNS
+        + [
+            ResolvedAttribute(
+                public_alias="id",
+                internal_name="sentry.item_id",
+                search_type="string",
+            ),
+            ResolvedAttribute(
+                public_alias="group_id",
+                internal_name="group_id",
+                search_type="integer",
+            ),
+        ]
+    )
+}
+
+OCCURRENCE_DEFINITIONS = ColumnDefinitions(
+    aggregates={},  # c.f. SPAN_AGGREGATE_DEFINITIONS when we're ready.
+    formulas={},
+    columns=OCCURRENCE_COLUMNS,
+    contexts={},
+    trace_item_type=TraceItemType.TRACE_ITEM_TYPE_OCCURRENCE,
+    filter_aliases={},
+    alias_to_column=None,
+    column_to_alias=None,
+)
@@ -0,0 +1,49 @@
+from collections.abc import Callable
+from typing import Any
+
+from sentry import options
+from sentry.utils import metrics
+
+
+def should_double_read_from_eap() -> bool:
+    return options.get("eap.occurrences.should_double_read")
+
+
+def should_callsite_use_eap_data_in_read(callsite: str) -> bool:
+    return callsite in options.get("eap.occurrences.callsites_using_eap_data_allowlist")
+
+
+def validate_read(
+    snuba_data: Any,
+    eap_data: Any,
+    callsite: str,
+    is_null_result: bool | None = None,
+    reasonable_match_comparator: Callable[[Any, Any], bool] | None = None,
+) -> None:
+    """
+    Checks whether a read from EAP Occurrences matches exactly with a read from snuba.
+    Inputs:
+      * snuba_data: Some data from Snuba (e.g. dict[str, str])
+      * eap_data: Some data from EAP (of format expecting to match snuba_data)
+      * callsite: Where your read is taking place.
+      * is_null_result: Whether the result is a "null result" (e.g. empty array). This
+          helps us to determine whether a "match" is significant.
+      * reasonable_match_comparator: None, or a function taking snuba_data & eap_data and
+          returning True if the read is "reasonable" and False otherwise.
+    """
+    tags = {
+        "callsite": callsite,
+        "exact_match": snuba_data == eap_data,
+        "source_of_truth": "eap" if should_callsite_use_eap_data_in_read(callsite) else "snuba",
+    }
+
+    if is_null_result is not None:
+        tags["is_null_result"] = is_null_result
+
+    if reasonable_match_comparator is not None:
+        tags["reasonable_match"] = reasonable_match_comparator(snuba_data, eap_data)
+
+    metrics.incr(
+        "eap.occurrences.validate_reads",
+        tags=tags,
+    )
diff --git a/src/sentry/snuba/occurrences_rpc.py b/src/sentry/snuba/occurrences_rpc.py
@@ -0,0 +1,114 @@
+import logging
+
+import sentry_sdk
+from sentry_protos.snuba.v1.request_common_pb2 import PageToken
+
+from sentry.search.eap.columns import ColumnDefinitions, ResolvedAttribute
+from sentry.search.eap.occurrences.definitions import OCCURRENCE_DEFINITIONS
+from sentry.search.eap.resolver import SearchResolver
+from sentry.search.eap.types import AdditionalQueries, EAPResponse, SearchResolverConfig
+from sentry.search.events.types import SAMPLING_MODES, SnubaParams
+from sentry.snuba import rpc_dataset_common
+
+logger = logging.getLogger("sentry.snuba.occurrences_rpc")
+
+
+class OccurrencesRPC(rpc_dataset_common.RPCBase):
+    DEFINITIONS = OCCURRENCE_DEFINITIONS
+
+    @classmethod
+    @sentry_sdk.trace
+    def run_table_query(
+        cls,
+        *,
+        params: SnubaParams,
+        query_string: str,
+        selected_columns: list[str],
+        orderby: list[str] | None,
+        offset: int,
+        limit: int,
+        referrer: str,
+        config: SearchResolverConfig,
+        sampling_mode: SAMPLING_MODES | None = None,
+        equations: list[str] | None = None,
+        search_resolver: SearchResolver | None = None,
+        page_token: PageToken | None = None,
+        additional_queries: AdditionalQueries | None = None,
+        debug: bool = False,
+    ) -> EAPResponse:
+        return cls._run_table_query(
+            rpc_dataset_common.TableQuery(
+                query_string=query_string,
+                selected_columns=selected_columns,
+                equations=equations,
+                orderby=orderby,
+                offset=offset,
+                limit=limit,
+                referrer=referrer,
+                sampling_mode=sampling_mode,
+                resolver=search_resolver or cls.get_resolver(params, config),
+                page_token=page_token,
+            ),
+            debug,
+        )
+
+    @classmethod
+    @sentry_sdk.trace
+    def run_table_query_with_tags(
+        cls,
+        tag_names: set[str],
+        *,
+        params: SnubaParams,
+        query_string: str,
+        selected_columns: list[str],
+        orderby: list[str] | None,
+        offset: int,
+        limit: int,
+        referrer: str,
+        config: SearchResolverConfig,
+        sampling_mode: SAMPLING_MODES | None = None,
+        equations: list[str] | None = None,
+        page_token: PageToken | None = None,
+        additional_queries: AdditionalQueries | None = None,
+        debug: bool = False,
+    ) -> EAPResponse:
+        """
+        Runs a query with additional selected_columns of all tags in tags.
+        tags should be formatted appropriately - e.g. {tags[foo], tags[bar]}
+        """
+
+        columns = OccurrencesRPC.DEFINITIONS.columns.copy()
+        for name in tag_names:
+            tag_name = f"tags[{name}]"
+            columns[tag_name] = ResolvedAttribute(
+                public_alias=tag_name,
+                internal_name=tag_name,
+                search_type="string",
+            )
+
+        definitions = ColumnDefinitions(
+            aggregates=OccurrencesRPC.DEFINITIONS.aggregates,
+            formulas=OccurrencesRPC.DEFINITIONS.formulas,
+            columns=columns,
+            contexts=OccurrencesRPC.DEFINITIONS.contexts,
+            trace_item_type=OccurrencesRPC.DEFINITIONS.trace_item_type,
+            filter_aliases=OccurrencesRPC.DEFINITIONS.filter_aliases,
+            alias_to_column=OccurrencesRPC.DEFINITIONS.alias_to_column,
+            column_to_alias=OccurrencesRPC.DEFINITIONS.column_to_alias,
+        )
+
+        return cls._run_table_query(
+            rpc_dataset_common.TableQuery(
+                query_string=query_string,
+                selected_columns=selected_columns,
+                equations=equations,
+                orderby=orderby,
+                offset=offset,
+                limit=limit,
+                referrer=referrer,
+                sampling_mode=sampling_mode,
+                resolver=SearchResolver(params=params, config=config, definitions=definitions),
+                page_token=page_token,
+            ),
+            debug,
+        )