mozilla · suhaibmujahid · Jul 6, 2025
diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py
@@ -52,7 +52,7 @@ class InlineComment:
     hunk_end_line: int | None = None
     is_generated: bool | None = None
     explanation: str | None = None
-    order: int | None = None
+    severity: int | None = None
 
 
 class ModelResultError(Exception):
@@ -95,13 +95,15 @@ class LargeDiffError(Exception):
    * Detect bugs, logical errors, performance concerns, security issues, or violations of the `{target_code_consistency}` coding standards.
    * Focus only on **new or changed lines** (lines beginning with `+`).
 
-3. **Assess Confidence and Order**:
+3. **Comment severity**:
+
+   * For each comment, assign a severity level:
+        - `1`: Critical issues that must be fixed before merging.
+        - `2`: Important issues that should be addressed.
+        - `3`: Minor issues that can be fixed later or are stylistic.
+    * If you are unsure about the severity, use `3` as a default.
+
 
-   * **Sort the comments by descending confidence and importance**:
-     * Start with issues you are **certain are valid**.
-     * Also, prioritize important issues that you are **confident about**.
-     * Follow with issues that are **plausible but uncertain** (possible false positives).
-   * Assign each comment a numeric `order`, starting at 1.
 
 4. **Write Clear, Constructive Comments**:
 
@@ -128,7 +130,7 @@ class LargeDiffError(Exception):
 * `"code_line"`: The number of the specific changed line of code that the comment refers to.
 * `"comment"`: A concise review comment.
 * `"explanation"`: A brief rationale for the comment, including how confident you are and why.
-* `"order"`: An integer indicating the comment’s priority (1 = highest confidence/importance).
+* `"severity"`: An integer from 1 to 3 indicating the severity of the issue.
 
 ---
 
@@ -1121,7 +1123,7 @@ def generate_processed_output(output: str, patch: PatchSet) -> Iterable[InlineCo
             content=comment["comment"],
             on_removed_code=not scope["has_added_lines"],
             explanation=comment["explanation"],
-            order=comment["order"],
+            severity=comment["severity"],
         )
 
 

diff --git a/scripts/code_review_tool_evaluator.py b/scripts/code_review_tool_evaluator.py
@@ -130,7 +130,7 @@ def evaluate_diff_comments(
         results = [
             {
                 "new_comment": comment.content,
-                "new_comment_order": comment.order,
+                "new_comment_severity": comment.severity,
                 "old_comments_count": 0,
                 "matched": False,
             }
@@ -326,7 +326,7 @@ def get_file(commit_hash, path):
     if is_variant_selected("llm-gpt-4.1"):
         tool_variants.append(
             (
-                "with-order",
+                "with-severity",
                 code_review.CodeReviewTool(
                     comment_gen_llms=[
                         generative_model_tool.create_openai_llm(
@@ -441,7 +441,7 @@ def main(args):
         "revision_id",
         "diff_id",
         "new_comment",
-        "new_comment_order",
+        "new_comment_severity",
         "old_comments_count",
         "matched",
         "old_comment",