Skip to content

Commit bceadcd

Browse files
authored
fix(discover): Top N queries do not support grouping by array fields (#96032)
- Fix a bug where top events did not correctly work with array fields, and instead just ignored their existence entirely
1 parent fb457a7 commit bceadcd

File tree

4 files changed

+81
-15
lines changed

4 files changed

+81
-15
lines changed

src/sentry/search/events/builder/discover.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ def resolve_top_event_conditions(
401401
resolved_field = self.resolve_column(self.prefixed_to_tag_map.get(field, field))
402402

403403
values: set[Any] = set()
404+
array_values: list[Any] = []
404405
for event in top_events:
405406
if field in event:
406407
alias = field
@@ -410,11 +411,12 @@ def resolve_top_event_conditions(
410411
continue
411412

412413
# Note that because orderby shouldn't be an array field its not included in the values
413-
if isinstance(event.get(alias), list):
414-
continue
414+
event_value = event.get(alias)
415+
if isinstance(event_value, list) and event_value not in array_values:
416+
array_values.append(event_value)
415417
else:
416-
values.add(event.get(alias))
417-
values_list = list(values)
418+
values.add(event_value)
419+
values_list = list(values) + array_values
418420

419421
if values_list:
420422
if field == "timestamp" or field.startswith("timestamp.to_"):
@@ -451,6 +453,21 @@ def resolve_top_event_conditions(
451453
conditions.append(And(conditions=[null_condition, non_none_condition]))
452454
else:
453455
conditions.append(null_condition)
456+
elif any(isinstance(value, list) for value in values_list):
457+
list_conditions = []
458+
for values in values_list:
459+
# This depends on a weird clickhouse behaviour where the best way to compare arrays is to do
460+
# array("foo", "bar") IN array("foo", "bar") == 1
461+
list_conditions.append(
462+
Condition(resolved_field, Op.IN if not other else Op.NOT_IN, values)
463+
)
464+
if len(list_conditions) > 1:
465+
if not other:
466+
conditions.append(Or(conditions=list_conditions))
467+
else:
468+
conditions.append(And(conditions=list_conditions))
469+
else:
470+
conditions.extend(list_conditions)
454471
else:
455472
conditions.append(
456473
Condition(resolved_field, Op.IN if not other else Op.NOT_IN, values_list)

src/sentry/snuba/discover.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,10 @@ def create_groupby_dict(
427427
value = result_row.get(field)
428428
if isinstance(value, list):
429429
if len(value) > 0:
430-
value = value[-1]
430+
# Even though frontend renders only the last element, this can cause key overlaps
431+
# For now lets just render this as a list to avoid that problem
432+
# TODO: timeseries can handle this correctly since this value isn't used as a dict key
433+
value = f"[{','.join(value)}]"
431434
else:
432435
value = ""
433436
values.append({"key": field, "value": str(value)})

tests/snuba/api/endpoints/test_organization_events_stats.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3136,7 +3136,8 @@ def setUp(self):
31363136
"message": "poof",
31373137
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
31383138
"user": {"email": self.user.email},
3139-
"tags": {"shared-tag": "yup"},
3139+
"tags": {"shared-tag": "yup", "env": "prod"},
3140+
"exception": {"values": [{"type": "NameError"}, {"type": "FooError"}]},
31403141
"fingerprint": ["group1"],
31413142
},
31423143
"project": self.project2,
@@ -3148,7 +3149,8 @@ def setUp(self):
31483149
"timestamp": (self.day_ago + timedelta(hours=1, minutes=2)).isoformat(),
31493150
"fingerprint": ["group2"],
31503151
"user": {"email": self.user2.email},
3151-
"tags": {"shared-tag": "yup"},
3152+
"tags": {"shared-tag": "yup", "env": "prod"},
3153+
"exception": {"values": [{"type": "NameError"}, {"type": "FooError"}]},
31523154
},
31533155
"project": self.project2,
31543156
"count": 6,
@@ -3159,7 +3161,8 @@ def setUp(self):
31593161
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
31603162
"fingerprint": ["group3"],
31613163
"user": {"email": "[email protected]"},
3162-
"tags": {"shared-tag": "yup"},
3164+
"tags": {"shared-tag": "yup", "env": "prod"},
3165+
"exception": {"values": [{"type": "NameError"}, {"type": "FooError"}]},
31633166
},
31643167
"project": self.project,
31653168
"count": 5,
@@ -3170,7 +3173,8 @@ def setUp(self):
31703173
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
31713174
"fingerprint": ["group4"],
31723175
"user": {"email": "[email protected]"},
3173-
"tags": {"shared-tag": "yup"},
3176+
"tags": {"shared-tag": "yup", "env": "prod"},
3177+
"exception": {"values": [{"type": "ValueError"}]},
31743178
},
31753179
"project": self.project,
31763180
"count": 4,
@@ -3180,7 +3184,8 @@ def setUp(self):
31803184
"message": "kinda bad",
31813185
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
31823186
"user": {"email": self.user.email},
3183-
"tags": {"shared-tag": "yup"},
3187+
"tags": {"shared-tag": "yup", "env": "staging"},
3188+
"exception": {"values": [{"type": "NameError"}, {"type": "FooError"}]},
31843189
"fingerprint": ["group7"],
31853190
},
31863191
"project": self.project,
@@ -3193,7 +3198,8 @@ def setUp(self):
31933198
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
31943199
"fingerprint": ["group5"],
31953200
"user": {"email": "[email protected]"},
3196-
"tags": {"shared-tag": "yup"},
3201+
"tags": {"shared-tag": "yup", "env": "dev"},
3202+
"exception": {"values": [{"type": "ValueError"}]},
31973203
},
31983204
"project": self.project,
31993205
"count": 2,
@@ -3204,7 +3210,8 @@ def setUp(self):
32043210
"timestamp": (self.day_ago + timedelta(minutes=2)).isoformat(),
32053211
"fingerprint": ["group6"],
32063212
"user": {"email": "[email protected]"},
3207-
"tags": {"shared-tag": "yup"},
3213+
"tags": {"shared-tag": "yup", "env": "dev"},
3214+
"exception": {"values": [{"type": "ValueError"}]},
32083215
},
32093216
"project": self.project,
32103217
"count": 1,
@@ -3250,8 +3257,14 @@ def test_simple_top_events(self):
32503257

32513258
for index, event in enumerate(self.events[:5]):
32523259
message = event.message or event.transaction
3260+
exception = event.get_event_metadata()["type"]
32533261
results = data[
3254-
",".join([message, self.event_data[index]["data"]["user"].get("email", "None")])
3262+
",".join(
3263+
[
3264+
f"{message} {exception}",
3265+
self.event_data[index]["data"]["user"].get("email", "None"),
3266+
]
3267+
)
32553268
]
32563269
assert results["order"] == index
32573270
assert [{"count": self.event_data[index]["count"]}] in [
@@ -3262,6 +3275,38 @@ def test_simple_top_events(self):
32623275
assert other["order"] == 5
32633276
assert [{"count": 3}] in [attrs for _, attrs in other["data"]]
32643277

3278+
def test_top_events_with_array_field(self):
3279+
"""
3280+
Test that when doing a qurey on top events with an array field that its handled correctly
3281+
"""
3282+
3283+
with self.feature(self.enabled_features):
3284+
response = self.client.get(
3285+
self.url,
3286+
data={
3287+
"start": self.day_ago.isoformat(),
3288+
"end": (self.day_ago + timedelta(hours=2)).isoformat(),
3289+
"interval": "1h",
3290+
"project": self.project.id,
3291+
"query": "!error.type:*Exception*",
3292+
"yAxis": "count_unique(user)",
3293+
"orderby": ["-count_unique(user)"],
3294+
"field": ["error.type", "count_unique(user)"],
3295+
"topEvents": "2",
3296+
"dataset": "errors",
3297+
},
3298+
format="json",
3299+
)
3300+
3301+
assert response.status_code == 200, response.content
3302+
3303+
data = response.data
3304+
assert len(data) == 2
3305+
assert "[NameError,FooError]" in data
3306+
assert "[ValueError]" in data
3307+
assert [attrs[0]["count"] for _, attrs in data["[NameError,FooError]"]["data"]] == [2, 0]
3308+
assert [attrs[0]["count"] for _, attrs in data["[ValueError]"]["data"]] == [1, 0]
3309+
32653310
def test_top_events_with_projects_other(self):
32663311
with self.feature(self.enabled_features):
32673312
response = self.client.get(
@@ -3318,13 +3363,14 @@ def test_top_events_with_issue(self):
33183363

33193364
for index, event in enumerate(self.events[:4]):
33203365
message = event.message
3366+
exception = event.get_event_metadata()["type"]
33213367
# Because we deleted the group for event 0
33223368
if index == 0 or event.group is None:
33233369
issue = "unknown"
33243370
else:
33253371
issue = event.group.qualified_short_id
33263372

3327-
results = data[",".join([issue, message])]
3373+
results = data[",".join([issue, f"{message} {exception}"])]
33283374
assert results["order"] == index
33293375
assert [{"count": self.event_data[index]["count"]}] in [
33303376
attrs for time, attrs in results["data"]

tests/snuba/api/endpoints/test_organization_events_stats_mep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -889,7 +889,7 @@ def test_split_decision_for_top_events_errors_widget(self):
889889
assert len(response.data) == 1
890890

891891
# Results are grouped by the error type
892-
assert response.data.get("test_error").get("meta").get(
892+
assert response.data.get("[test_error]").get("meta").get(
893893
"discoverSplitDecision"
894894
) is DashboardWidgetTypes.get_type_name(DashboardWidgetTypes.ERROR_EVENTS)
895895

0 commit comments

Comments
 (0)