Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions src/sentry/snuba/occurrences_rpc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from enum import Enum
from re import finditer
from typing import Any

import sentry_sdk
Expand All @@ -12,6 +13,7 @@
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.types import AdditionalQueries, EAPResponse, SearchResolverConfig
from sentry.search.events.types import SAMPLING_MODES, SnubaParams
from sentry.services.eventstore.query_preprocessing import get_all_merged_group_ids
from sentry.snuba import rpc_dataset_common
from sentry.utils.snuba import process_value

Expand Down Expand Up @@ -55,7 +57,7 @@ def run_table_query(
) -> EAPResponse:
return cls._run_table_query(
rpc_dataset_common.TableQuery(
query_string=query_string,
query_string=cls._update_eap_query_string_with_merged_group_ids(query_string),
selected_columns=selected_columns,
equations=equations,
orderby=orderby,
Expand Down Expand Up @@ -117,7 +119,7 @@ def run_table_query_with_tags(

return cls._run_table_query(
rpc_dataset_common.TableQuery(
query_string=query_string,
query_string=cls._update_eap_query_string_with_merged_group_ids(query_string),
selected_columns=selected_columns,
equations=equations,
orderby=orderby,
Expand Down Expand Up @@ -182,7 +184,7 @@ def run_grouped_timeseries_query(
rpc_request, _aggregates, groupbys_resolved = cls.get_timeseries_query(
search_resolver=search_resolver,
params=params,
query_string=query_string,
query_string=cls._update_eap_query_string_with_merged_group_ids(query_string),
y_axes=y_axes,
groupby=groupby,
referrer=referrer,
Expand Down Expand Up @@ -261,3 +263,40 @@ def _build_category_filter(cls, category: OccurrenceCategory | None) -> TraceIte
)

return None

@staticmethod
def _update_eap_query_string_with_merged_group_ids(qs: str) -> str:
matches = finditer(r"group_id:(\d+|\[(\d(, )?)+\])", qs)
pieces = []
i = 0
for m in matches:
match_start, match_end = m.span()
if match_start != i:
pieces.append(qs[i:match_start])

match = qs[match_start:match_end]
if "[" in match:
group_ids = match.split("[")[1][:-1].split(", ")
all_group_ids = get_all_merged_group_ids({int(gid) for gid in group_ids})

# Only need to update in the "new matches found" case.
if len(all_group_ids) > len(group_ids):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

List-vs-set length comparison can skip needed updates

Low Severity

The guard len(all_group_ids) > len(group_ids) compares the returned set length against the length of the original string list. Since the input to get_all_merged_group_ids is deduplicated via a set comprehension, if the original list had duplicates, len(group_ids) (list) can exceed len(all_group_ids) (set) even when new merged IDs were found, causing the update to be incorrectly skipped.

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There shouldn't be duplicates in the passed-in input.

pieces.append(f"group_id:[{', '.join(str(gid) for gid in all_group_ids)}]")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-deterministic set iteration produces unstable query output

Medium Severity

get_all_merged_group_ids returns a set, and iterating over it with ', '.join(str(gid) for gid in all_group_ids) produces group IDs in a non-deterministic order. The tests assert exact string equality (e.g., == "group_id:[1, 2, 3]"), which only passes due to CPython's implementation-specific hash ordering for small integers. With real (large) group IDs, the output order will vary across runs, making behavior unpredictable and tests flaky.

Additional Locations (1)

Fix in Cursor Fix in Web

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Query behavior should not depend on array ordering and the tests can keep using small numbers.

else:
pieces.append(match)

else:
group_id = match.split(":")[1]
all_group_ids = get_all_merged_group_ids({int(group_id)})

# Only need to update in the "new matches found" case.
if len(all_group_ids) > 1:
pieces.append(f"group_id:[{', '.join(str(gid) for gid in all_group_ids)}]")
else:
pieces.append(match)

i = match_end

pieces.append(qs[i:])

return "".join(pieces)
44 changes: 43 additions & 1 deletion tests/sentry/snuba/test_occurrences_rpc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime, timedelta, timezone
from datetime import UTC, datetime, timedelta, timezone

import pytest
from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import (
Expand All @@ -19,6 +19,7 @@
)

from sentry.exceptions import InvalidSearchQuery
from sentry.models.groupredirect import GroupRedirect
from sentry.search.eap.occurrences.definitions import OCCURRENCE_DEFINITIONS
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.types import SearchResolverConfig
Expand Down Expand Up @@ -225,6 +226,47 @@ def test_type_negation_filter_query(self) -> None:
)
assert having is None

def test_update_eap_query_string_with_merged_group_ids(self) -> None:
self.g1 = self.create_group(id=1)
self.g2 = self.create_group(id=2)
self.g3 = self.create_group(id=3)
self.g4 = self.create_group(id=4)

self.gr31 = GroupRedirect.objects.create(
id=10001,
organization_id=self.g1.project.organization_id,
group_id=self.g1.id,
previous_group_id=self.g3.id,
date_added=datetime.now(UTC) - timedelta(hours=4),
)
self.gr21 = GroupRedirect.objects.create(
id=10002,
organization_id=self.g1.project.organization_id,
group_id=self.g1.id,
previous_group_id=self.g2.id,
date_added=datetime.now(UTC) - timedelta(hours=1),
)

assert Occurrences._update_eap_query_string_with_merged_group_ids("foo:bar") == "foo:bar"
assert (
Occurrences._update_eap_query_string_with_merged_group_ids("group_id:1")
== "group_id:[1, 2, 3]"
)
assert (
Occurrences._update_eap_query_string_with_merged_group_ids("group_id:3")
== "group_id:[1, 3]"
)
assert (
Occurrences._update_eap_query_string_with_merged_group_ids("group_id:[3, 4]")
== "group_id:[1, 3, 4]"
)
assert (
Occurrences._update_eap_query_string_with_merged_group_ids(
"group_id:1 foo:bar group_id:[3, 4]"
)
== "group_id:[1, 2, 3] foo:bar group_id:[1, 3, 4]"
)


class OccurrencesTimeseriesTest(TestCase):
def setUp(self) -> None:
Expand Down
Loading