From 5d59b5cbc4f2806125f4440030ea9528e0f76991 Mon Sep 17 00:00:00 2001
From: Joseph Sawaya <joseph.sawaya@sentry.io>
Date: Wed, 23 Apr 2025 13:44:23 -0400
Subject: [PATCH 1/2] fix(ta-gql): get testsuites

I made a change previously that made it so the testsuite column in
the dataframe returned by get_results became a list[str] instead of
str but I didn't update get testsuites to reflect that. This fixes
that.

also adds a test for the new V1 format that also verifies it populates
the testsuites correctly and fixes a bug with that format renaming a
column
---
 ...estCase__gql_query_with_new_ta_v1__0.json} |  0
 graphql_api/tests/test_test_analytics.py      | 48 ++++++++++++++++++-
 .../types/test_analytics/test_analytics.py    |  2 +-
 utils/test_results.py                         |  2 +-
 4 files changed, 48 insertions(+), 4 deletions(-)
 rename graphql_api/tests/snapshots/{analytics__TestAnalyticsTestCase__gql_query_with_new_ta__0.json => analytics__TestAnalyticsTestCase__gql_query_with_new_ta_v1__0.json} (100%)

diff --git a/graphql_api/tests/snapshots/analytics__TestAnalyticsTestCase__gql_query_with_new_ta__0.json b/graphql_api/tests/snapshots/analytics__TestAnalyticsTestCase__gql_query_with_new_ta_v1__0.json
similarity index 100%
rename from graphql_api/tests/snapshots/analytics__TestAnalyticsTestCase__gql_query_with_new_ta__0.json
rename to graphql_api/tests/snapshots/analytics__TestAnalyticsTestCase__gql_query_with_new_ta_v1__0.json
diff --git a/graphql_api/tests/test_test_analytics.py b/graphql_api/tests/test_test_analytics.py
index 07501ca25d..a822140dc8 100644
--- a/graphql_api/tests/test_test_analytics.py
+++ b/graphql_api/tests/test_test_analytics.py
@@ -744,7 +744,41 @@ def test_gql_query_flake_aggregates(self, repository, store_in_redis):
             "flakeCount": 1,
         }
 
-    def test_gql_query_with_new_ta(self, mocker, repository, snapshot):
+    def test_gql_query_test_suites(self, repository, store_in_redis):
+        query = base_gql_query % (
+            repository.author.username,
+            repository.name,
+            """
+            testSuites
+            """,
+        )
+
+        result = self.gql_request(query, owner=repository.author)
+
+        assert sorted(result["owner"]["repository"]["testAnalytics"]["testSuites"]) == [
+            "testsuite1",
+            "testsuite2",
+            "testsuite3",
+            "testsuite4",
+            "testsuite5",
+        ]
+
+    def test_gql_query_test_suites_term(self, repository, store_in_redis):
+        query = base_gql_query % (
+            repository.author.username,
+            repository.name,
+            """
+            testSuites(term: "testsuite1")
+            """,
+        )
+
+        result = self.gql_request(query, owner=repository.author)
+
+        assert result["owner"]["repository"]["testAnalytics"]["testSuites"] == [
+            "testsuite1",
+        ]
+
+    def test_gql_query_with_new_ta_v1(self, mocker, repository, snapshot):
         # set the feature flag
         mocker.patch("rollouts.READ_NEW_TA.check_value", return_value=True)
 
@@ -757,7 +791,8 @@ def test_gql_query_with_new_ta(self, mocker, repository, snapshot):
         storage.write_file(
             settings.GCS_BUCKET_NAME,
             f"test_analytics/branch_rollups/{repository.repoid}/{repository.branch}.arrow",
-            test_results_table_no_version.write_ipc(None).getvalue(),
+            test_results_table_v1.write_ipc(None).getvalue(),
+            metadata={"version": "1"},
         )
 
         # run the GQL query
@@ -784,6 +819,7 @@ def test_gql_query_with_new_ta(self, mocker, repository, snapshot):
                     }
                 }
             }
+            testSuites
             """,
         )
 
@@ -804,6 +840,14 @@ def test_gql_query_with_new_ta(self, mocker, repository, snapshot):
             ]
         ]
 
+        assert sorted(result["owner"]["repository"]["testAnalytics"]["testSuites"]) == [
+            "testsuite0",
+            "testsuite1",
+            "testsuite2",
+            "testsuite3",
+            "testsuite4",
+        ]
+
         storage.delete_file(
             settings.GCS_BUCKET_NAME,
             f"test_analytics/branch_rollups/{repository.repoid}/{repository.branch}.arrow",
diff --git a/graphql_api/types/test_analytics/test_analytics.py b/graphql_api/types/test_analytics/test_analytics.py
index 26a3c2e885..3daa186fe5 100644
--- a/graphql_api/types/test_analytics/test_analytics.py
+++ b/graphql_api/types/test_analytics/test_analytics.py
@@ -312,7 +312,7 @@ def get_test_suites(
     if table is None:
         return []
 
-    testsuites = table.select(pl.col("testsuite")).unique()
+    testsuites = table.select(pl.col("testsuite").explode()).unique()
 
     if term:
         testsuites = testsuites.filter(pl.col("testsuite").str.starts_with(term))
diff --git a/utils/test_results.py b/utils/test_results.py
index c42c543646..0ec5b3c36f 100644
--- a/utils/test_results.py
+++ b/utils/test_results.py
@@ -198,7 +198,7 @@ def v1_agg_table(table: pl.LazyFrame) -> pl.LazyFrame:
         pl.col("avg_duration") * (pl.col("pass_count") + pl.col("fail_count"))
     ).sum() / (pl.col("pass_count") + pl.col("fail_count")).sum()
 
-    table = table.group_by("computed_name").agg(
+    table = table.group_by(pl.col("computed_name").alias("name")).agg(
         pl.col("testsuite").alias(
             "testsuite"
         ),  # TODO: filter by this before we aggregate

From 0194300cfc9116a00ccc39103e14587a38fdb487 Mon Sep 17 00:00:00 2001
From: Joseph Sawaya <joseph.sawaya@sentry.io>
Date: Wed, 23 Apr 2025 13:44:23 -0400
Subject: [PATCH 2/2] fix(ta-gql): fill_nan on avg_duration in aggregate

it's possible for the avg_duration to be NaN (which is fine) but I
wasn't taking this into account in the aggregate calculation which
meant that the NaN's were propagating to the total duration and
slowest tests duration so we need to replace them with 0 because
that's effectively what they are
---
 graphql_api/tests/test_test_analytics.py      | 25 +++++++++++++++++++
 .../test_results_aggregates.py                |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/graphql_api/tests/test_test_analytics.py b/graphql_api/tests/test_test_analytics.py
index a822140dc8..bb5fba7e85 100644
--- a/graphql_api/tests/test_test_analytics.py
+++ b/graphql_api/tests/test_test_analytics.py
@@ -819,6 +819,17 @@ def test_gql_query_with_new_ta_v1(self, mocker, repository, snapshot):
                     }
                 }
             }
+            flakeAggregates {
+                flakeRate
+                flakeCount
+            }
+            testResultsAggregates {
+                totalDuration
+                slowestTestsDuration
+                totalFails
+                totalSkips
+                totalSlowTests
+            }
             testSuites
             """,
         )
@@ -848,6 +859,20 @@ def test_gql_query_with_new_ta_v1(self, mocker, repository, snapshot):
             "testsuite4",
         ]
 
+        assert result["owner"]["repository"]["testAnalytics"]["flakeAggregates"] == {
+            "flakeRate": (1 / 15),
+            "flakeCount": 1,
+        }
+
+        assert result["owner"]["repository"]["testAnalytics"][
+            "testResultsAggregates"
+        ] == {
+            "totalDuration": 7500.0,
+            "slowestTestsDuration": 2500.0,
+            "totalFails": 50,
+            "totalSkips": 25,
+            "totalSlowTests": 1,
+        }
         storage.delete_file(
             settings.GCS_BUCKET_NAME,
             f"test_analytics/branch_rollups/{repository.repoid}/{repository.branch}.arrow",
diff --git a/graphql_api/types/test_results_aggregates/test_results_aggregates.py b/graphql_api/types/test_results_aggregates/test_results_aggregates.py
index da99787e3c..0d89aa6abb 100644
--- a/graphql_api/types/test_results_aggregates/test_results_aggregates.py
+++ b/graphql_api/types/test_results_aggregates/test_results_aggregates.py
@@ -24,7 +24,7 @@ class TestResultsAggregates:
 
 
 def calculate_aggregates(table: pl.DataFrame) -> pl.DataFrame:
-    return table.select(
+    return table.with_columns(pl.col("avg_duration").fill_nan(0)).select(
         (
             pl.col("avg_duration")
             * (pl.col("total_pass_count") + pl.col("total_fail_count"))