getsentry
diff --git a/‎src/sentry/feedback/endpoints/organization_feedback_categories.py
Lines changed: 57 additions & 9 deletions b/‎src/sentry/feedback/endpoints/organization_feedback_categories.py
Lines changed: 57 additions & 9 deletions
@@ -35,14 +35,18 @@
 
 MAX_RETURN_CATEGORIES = 4
 
-MAX_ASSOCIATED_LABELS = 12
+# Max labels in a label group (including the primary label)
+MAX_GROUP_LABELS = 12
 
 # Number of top labels to pass to Seer to ask for similar labels
 NUM_TOP_LABELS = 6
 
 # Two days because the largest granularity we cache at is the day
 CATEGORIES_CACHE_TIMEOUT = 172800
 
+# If the number of feedbacks is less than this, we don't ask for associated labels
+THRESHOLD_TO_GET_ASSOCIATED_LABELS = 50
+
 
 class LabelGroupFeedbacksContext(TypedDict):
     """Corresponds to LabelGroupFeedbacksContext in Seer."""
@@ -192,9 +196,16 @@ def get(self, request: Request, organization: Organization) -> Response:
             feedbacks_context=context_feedbacks,
         )
 
-        label_groups: list[FeedbackLabelGroup] = json.loads(
-            make_seer_request(seer_request).decode("utf-8")
-        )["data"]
+        if len(context_feedbacks) >= THRESHOLD_TO_GET_ASSOCIATED_LABELS:
+            label_groups: list[FeedbackLabelGroup] = json.loads(
+                make_seer_request(seer_request).decode("utf-8")
+            )["data"]
+        else:
+            # If there are less than THRESHOLD_TO_GET_ASSOCIATED_LABELS feedbacks, we don't ask for associated labels
+            # The more feedbacks there are, the LLM does a better job of generating associated labels since it has more context
+            label_groups = [
+                FeedbackLabelGroup(primaryLabel=label, associatedLabels=[]) for label in top_labels
+            ]
 
         # If the LLM just forgets or adds extra primary labels, log it but still generate categories
         if len(label_groups) != len(top_labels):
@@ -214,11 +225,48 @@ def get(self, request: Request, organization: Organization) -> Response:
                     extra={"label_group": label_group},
                 )
 
-        # Converts label_groups (which maps primary label to associated labels) to a list of lists, where the first element is the primary label and the rest are the associated labels
-        label_groups_lists: list[list[str]] = [
-            [label_group["primaryLabel"]] + label_group["associatedLabels"][:MAX_ASSOCIATED_LABELS]
-            for label_group in label_groups
-        ]
+        # Sometimes, the LLM will give us associated labels that, to put it bluntly, are not associated labels.
+        # For example, if the primary label is "Navigation", the LLM might give us "Usability" or "User Interface" as associated labels.
+        # In a case like that, "Usability" and "User Interface" are obviously more general, so will most likely have more feedbacks associated with them than "Navigation".
+        # One way to filter these out is to check the counts of each associated label, and compare that to the counts of the primary label.
+        # If the count of the associated label is >3/4 of the count of the primary label, we can assume that the associated label is not a valid associated label.
+        # Even if it is valid, we don't really care, it matters more that we get rid of it in the situations that it is invalid (which is pretty often).
+
+        # Stores each label as an individual label group (so a list of lists, each inside list containing a single label)
+        # This is done to get the counts of each label individually, so we can filter out invalid associated labels
+        flattened_label_groups: list[list[str]] = []
+        for label_group in label_groups:
+            flattened_label_groups.append([label_group["primaryLabel"]])
+            flattened_label_groups.extend([[label] for label in label_group["associatedLabels"]])
+
+        individual_label_counts = query_label_group_counts(
+            organization_id=organization.id,
+            project_ids=numeric_project_ids,
+            start=start,
+            end=end,
+            labels_groups=flattened_label_groups,
+        )
+
+        label_to_count = {}
+        for label_lst, count in zip(flattened_label_groups, individual_label_counts):
+            label_to_count[label_lst[0]] = count
+
+        label_groups_lists: list[list[str]] = []
+        for i, label_group in enumerate(label_groups):
+            primary_label = label_group["primaryLabel"]
+            associated_labels = label_group["associatedLabels"]
+            label_groups_lists.append([primary_label])
+            for associated_label in associated_labels:
+                # Once we have MAX_GROUP_LABELS total labels, stop adding more
+                if len(label_groups_lists[i]) >= MAX_GROUP_LABELS:
+                    break
+                # Ensure the associated label has feedbacks associated with it, and it doesn't have *too many* feedbacks associated with it
+                # Worst case, if the associated label is wrong, <= 3/4 of the feedbacks associated with it are wrong
+                if (
+                    label_to_count[associated_label] * 4 <= label_to_count[primary_label] * 3
+                    and label_to_count[associated_label] != 0
+                ):
+                    label_groups_lists[i].append(associated_label)
 
         # label_groups_lists might be empty if the LLM just decides not to give us any primary labels (leading to ValueError, then 500)
         # This will be logged since top_labels is guaranteed to be non-empty, but label_groups_lists will be empty