Skip to content
This repository was archived by the owner on Jun 13, 2025. It is now read-only.

Commit dc40de8

Browse files
committed
fix: dedup by computed_name
1 parent e6b347d commit dc40de8

File tree

2 files changed

+140
-0
lines changed

2 files changed

+140
-0
lines changed

graphql_api/tests/test_test_analytics.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,53 @@ def mock_storage(mocker):
7575
rows = [RowFactory()(datetime.datetime(2024, 1, 1 + i)) for i in range(5)]
7676

7777

78+
rows_with_duplicate_names = [
79+
RowFactory()(datetime.datetime(2024, 1, 1 + i)) for i in range(5)
80+
]
81+
for i in range(0, len(rows_with_duplicate_names) - 1, 2):
82+
rows_with_duplicate_names[i]["name"] = rows_with_duplicate_names[i + 1]["name"]
83+
84+
85+
def dedup(rows: list[dict]) -> list[dict]:
86+
by_name = {}
87+
for row in rows:
88+
if row["name"] not in by_name:
89+
by_name[row["name"]] = []
90+
by_name[row["name"]].append(row)
91+
92+
result = []
93+
for name, group in by_name.items():
94+
if len(group) == 1:
95+
result.append(group[0])
96+
continue
97+
98+
weights = [r["total_pass_count"] + r["total_fail_count"] for r in group]
99+
total_weight = sum(weights)
100+
101+
merged = {
102+
"name": name,
103+
"testsuite": sorted({r["testsuite"] for r in group}),
104+
"flags": sorted({flag for r in group for flag in r["flags"]}),
105+
"test_id": group[0]["test_id"], # Keep first test_id
106+
"failure_rate": sum(r["failure_rate"] * w for r, w in zip(group, weights))
107+
/ total_weight,
108+
"flake_rate": sum(r["flake_rate"] * w for r, w in zip(group, weights))
109+
/ total_weight,
110+
"updated_at": max(r["updated_at"] for r in group),
111+
"avg_duration": sum(r["avg_duration"] * w for r, w in zip(group, weights))
112+
/ total_weight,
113+
"total_fail_count": sum(r["total_fail_count"] for r in group),
114+
"total_flaky_fail_count": sum(r["total_flaky_fail_count"] for r in group),
115+
"total_pass_count": sum(r["total_pass_count"] for r in group),
116+
"total_skip_count": sum(r["total_skip_count"] for r in group),
117+
"commits_where_fail": sum(r["commits_where_fail"] for r in group),
118+
"last_duration": max(r["last_duration"] for r in group),
119+
}
120+
result.append(merged)
121+
122+
return sorted(result, key=lambda x: x["updated_at"], reverse=True)
123+
124+
78125
def row_to_camel_case(row: dict) -> dict:
79126
return {
80127
"commitsFailed"
@@ -89,6 +136,7 @@ def row_to_camel_case(row: dict) -> dict:
89136

90137

91138
test_results_table = pl.DataFrame(rows)
139+
test_results_table_with_duplicate_names = pl.DataFrame(rows_with_duplicate_names)
92140

93141

94142
def base64_encode_string(x: str) -> str:
@@ -143,6 +191,21 @@ def store_in_storage(repository, mock_storage):
143191
)
144192

145193

194+
@pytest.fixture
195+
def store_in_redis_with_duplicate_names(repository):
196+
redis = get_redis_connection()
197+
redis.set(
198+
f"test_results:{repository.repoid}:{repository.branch}:30",
199+
test_results_table_with_duplicate_names.write_ipc(None).getvalue(),
200+
)
201+
202+
yield
203+
204+
redis.delete(
205+
f"test_results:{repository.repoid}:{repository.branch}:30",
206+
)
207+
208+
146209
class TestAnalyticsTestCase(
147210
GraphQLTestHelper,
148211
):
@@ -583,6 +646,51 @@ def test_gql_query(self, repository, store_in_redis, mock_storage):
583646
for row in reversed(rows)
584647
]
585648

649+
def test_gql_query_with_duplicate_names(
650+
self, repository, store_in_redis_with_duplicate_names, mock_storage
651+
):
652+
query = base_gql_query % (
653+
repository.author.username,
654+
repository.name,
655+
"""
656+
testResults(ordering: { parameter: UPDATED_AT, direction: DESC } ) {
657+
totalCount
658+
edges {
659+
cursor
660+
node {
661+
name
662+
failureRate
663+
flakeRate
664+
updatedAt
665+
avgDuration
666+
totalFailCount
667+
totalFlakyFailCount
668+
totalPassCount
669+
totalSkipCount
670+
commitsFailed
671+
lastDuration
672+
}
673+
}
674+
}
675+
""",
676+
)
677+
678+
result = self.gql_request(query, owner=repository.author)
679+
680+
assert (
681+
result["owner"]["repository"]["testAnalytics"]["testResults"]["totalCount"]
682+
== 3
683+
)
684+
assert result["owner"]["repository"]["testAnalytics"]["testResults"][
685+
"edges"
686+
] == [
687+
{
688+
"cursor": cursor(row),
689+
"node": row_to_camel_case(row),
690+
}
691+
for row in dedup(rows_with_duplicate_names)
692+
]
693+
586694
def test_gql_query_aggregates(self, repository, store_in_redis, mock_storage):
587695
query = base_gql_query % (
588696
repository.author.username,

graphql_api/types/test_analytics/test_analytics.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,38 @@ def generate_test_results(
197197
},
198198
)
199199

200+
failure_rate_expr = (
201+
pl.col("failure_rate")
202+
* (pl.col("total_fail_count") + pl.col("total_pass_count"))
203+
).sum() / (pl.col("total_fail_count") + pl.col("total_pass_count")).sum()
204+
205+
flake_rate_expr = (
206+
pl.col("flake_rate") * (pl.col("total_fail_count") + pl.col("total_pass_count"))
207+
).sum() / (pl.col("total_fail_count") + pl.col("total_pass_count")).sum()
208+
209+
# dedup
210+
table = table.group_by("name").agg(
211+
pl.col("test_id").first().alias("test_id"),
212+
pl.col("testsuite").implode().alias("testsuite"),
213+
pl.col("flags").explode().unique().alias("flags"),
214+
failure_rate_expr.alias("failure_rate"),
215+
flake_rate_expr.alias("flake_rate"),
216+
pl.col("updated_at").max().alias("updated_at"),
217+
(
218+
(
219+
pl.col("avg_duration")
220+
* (pl.col("total_pass_count") + pl.col("total_fail_count"))
221+
).sum()
222+
/ (pl.col("total_pass_count") + pl.col("total_fail_count")).sum()
223+
).alias("avg_duration"),
224+
pl.col("total_fail_count").sum().alias("total_fail_count"),
225+
pl.col("total_flaky_fail_count").sum().alias("total_flaky_fail_count"),
226+
pl.col("total_pass_count").sum().alias("total_pass_count"),
227+
pl.col("total_skip_count").sum().alias("total_skip_count"),
228+
pl.col("commits_where_fail").sum().alias("commits_where_fail"),
229+
pl.col("last_duration").max().alias("last_duration"),
230+
)
231+
200232
if term:
201233
table = table.filter(pl.col("name").str.contains(term))
202234

0 commit comments

Comments
 (0)