Merge branch 'master' into dependabot/pip/langchain-mistralai-1.1.1

suhaibmujahid · web-flow · commit e79cca07a870 · 2025-12-17T17:11:27.000-05:00
diff --git a/bugbug/code_search/searchfox_api.py b/bugbug/code_search/searchfox_api.py
@@ -235,6 +235,13 @@ def definitions_to_results(self, commit_hash, definitions):
                     )
                 ),
             )
+            if source is None:
+                logger.warning(
+                    "Could not extract source for %s:%d",
+                    definition["path"],
+                    definition["start"],
+                )
+                continue
             result.append(
                 Function(
                     definition["name"],
diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
@@ -183,6 +183,7 @@ def _generate_suggestions(self, patch: Patch):
                 },
                 context=CodeReviewContext(patch=patch),
                 stream_mode="values",
+                config={"recursion_limit": 50},
             ):
                 result = chunk
         except GraphRecursionError as e:
diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py
@@ -20,24 +20,31 @@ class CodeReviewContext:
 
 
 @tool
-def expand_context(file_path: str, line_number: int) -> str:
-    """Expand the context around a specific line in a file diff.
+def expand_context(file_path: str, start_line: int, end_line: int) -> str:
+    """Show the content of a file between specified line numbers as it is before the patch.
+
+    Be careful to not fill your context window with too much data. Request the
+    minimum amount of context necessary to understand the code, but do not split
+    what you really need into multiple requests if the line range is continuous.
 
     Args:
         file_path: The path to the file.
-        line_number: The line number to expand context around. It should be based on the original file, not the patch.
+        start_line: The starting line number in the original file. Minimum is 1.
+        end_line: The ending line number in the original file. Maximum is the total number of lines in the file.
 
     Returns:
-        Lines of code around the specified line number.
+        The content of the file between the specified line numbers.
     """
     runtime = get_runtime(CodeReviewContext)
-    file_content = runtime.context.patch.get_old_file(file_path)
 
-    # TODO: Expanding the context using an AST parser like tree-sitter to
-    # include the whole function or class when it is relatively small.
+    try:
+        file_content = runtime.context.patch.get_old_file(file_path)
+    except FileNotFoundError:
+        return "File not found in the repository before the patch."
+
     lines = file_content.splitlines()
-    start = max(0, line_number - 20)
-    end = min(len(lines), line_number + 20)
+    start = max(1, start_line) - 1
+    end = min(len(lines), end_line)
 
     # Format the output with line numbers that match the original file.
     line_number_width = len(str(end))
diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py
@@ -177,7 +177,7 @@ class Bug:
     """Represents a Bugzilla bug from bugzilla.mozilla.org."""
 
     def __init__(self, data: dict):
-        self.metadata = data
+        self._metadata = data
 
     @staticmethod
     def get(bug_id: int) -> "Bug":
@@ -197,6 +197,10 @@ def get(bug_id: int) -> "Bug":
 
         return Bug(bug_data)
 
+    @property
+    def summary(self) -> str:
+        return self._metadata["summary"]
+
     def to_md(self) -> str:
         """Return a markdown representation of the bug."""
-        return bug_dict_to_markdown(self.metadata)
+        return bug_dict_to_markdown(self._metadata)
diff --git a/bugbug/tools/core/platforms/phabricator.py b/bugbug/tools/core/platforms/phabricator.py
@@ -14,9 +14,10 @@
 import tenacity
 from tqdm import tqdm
 
-from bugbug import bugzilla, db, phabricator, utils
+from bugbug import db, phabricator, utils
 from bugbug.tools.core.data_types import InlineComment, ReviewRequest
 from bugbug.tools.core.platforms.base import Patch, ReviewData
+from bugbug.tools.core.platforms.bugzilla import Bug
 from bugbug.utils import get_secret
 
 logger = getLogger(__name__)
@@ -130,7 +131,7 @@ def revision_phid(self) -> str:
 
         raise ValueError("Cannot determine revision PHID")
 
-    def _get_file(self, file_path: str, is_before_patch: bool) -> str:
+    def _get_file_from_patch(self, file_path: str, is_before_patch: bool) -> str:
         for changeset in self._changesets:
             if changeset["fields"]["path"]["displayPath"] == file_path:
                 break
@@ -150,8 +151,32 @@ def _get_file(self, file_path: str, is_before_patch: bool) -> str:
 
         return r.text
 
+    def _get_file_from_repo(self, file_path: str, commit_hash: str) -> str:
+        r = utils.get_session("hgmo").get(
+            f"https://hg.mozilla.org/mozilla-unified/raw-file/{commit_hash}/{file_path}",
+            headers={
+                "User-Agent": utils.get_user_agent(),
+            },
+        )
+
+        if r.status_code == 404:
+            raise FileNotFoundError(
+                f"File {file_path} not found in commit {commit_hash}"
+            )
+
+        r.raise_for_status()
+        return r.text
+
     def get_old_file(self, file_path: str) -> str:
-        return self._get_file(file_path, is_before_patch=True)
+        if file_path.startswith("b/") or file_path.startswith("a/"):
+            file_path = file_path[2:]
+
+        try:
+            return self._get_file_from_patch(file_path, is_before_patch=True)
+        except FileNotFoundError:
+            return self._get_file_from_repo(
+                file_path, commit_hash=self.base_commit_hash
+            )
 
     @cached_property
     def _changesets(self) -> list[dict]:
@@ -251,29 +276,16 @@ def _revision_metadata(self) -> dict:
         return revision
 
     @cached_property
-    def _bug_metadata(self) -> dict | None:
-        id = self.bug_id
-        bugs = bugzilla.get(id)
-
-        if id not in bugs:
-            logger.warning(
-                "Bug %d not found in Bugzilla. This might be a private bug.", id
-            )
-            return None
-
-        return bugs[id]
+    def bug(self) -> Bug:
+        return Bug.get(self.bug_id)
 
     @property
     def bug_id(self) -> int:
         return int(self._revision_metadata["fields"]["bugzilla.bug-id"])
 
-    @cached_property
+    @property
     def bug_title(self) -> str:
-        if not self._bug_metadata:
-            # Use a placeholder when the bug metadata is not available
-            return "--"
-
-        return self._bug_metadata["summary"]
+        return self.bug.summary
 
     @cached_property
     def patch_title(self) -> str:
diff --git a/requirements.txt b/requirements.txt
@@ -2,14 +2,14 @@ amqp==5.3.1
 beautifulsoup4==4.14.3
 boto3==1.41.2
 imbalanced-learn==0.14.0
-langchain==1.0.8
+langchain==1.1.3
 langchain-anthropic==1.2.0
 langchain-classic==1.0.0
 langchain-community==0.4.1
-langchain-google-genai==3.1.0
+langchain-google-genai==4.0.0
 langchain-mistralai==1.1.1
 langchain-openai==1.0.3
-langgraph==1.0.4
+langgraph==1.0.5
 libmozdata==0.2.12
 llama-cpp-python==0.2.90
 lmdb==1.7.5
diff --git a/scripts/code_review_tool_evaluator.py b/scripts/code_review_tool_evaluator.py
@@ -32,6 +32,7 @@
 from bugbug.tools import code_review
 from bugbug.tools.code_review.utils import parse_model_output
 from bugbug.tools.core import llms
+from bugbug.tools.core.exceptions import ModelResultError
 from bugbug.vectordb import QdrantVectorDB
 
 code_review.TARGET_SOFTWARE = "Mozilla Firefox"
@@ -221,7 +222,9 @@ def print_evaluation_matches(matching_results: list[dict]):
         )
 
 
-def get_tool_variants() -> list[tuple[str, code_review.CodeReviewTool]]:
+def get_tool_variants(
+    variants: list[str],
+) -> list[tuple[str, code_review.CodeReviewTool]]:
     """Returns a list of tool variants to evaluate.
 
     Returns:
@@ -254,31 +257,33 @@ def get_file(commit_hash, path):
 
     tool_variants = []
 
-    tool_variants.append(
-        (
-            "Claude",
-            code_review.CodeReviewTool(
-                llm=llms.create_anthropic_llm(),
-                function_search=function_search,
-                review_comments_db=review_comments_db,
-                suggestions_feedback_db=suggestions_feedback_db,
-                verbose=VERBOSE_CODE_REVIEW,
-            ),
+    if "claude" in variants:
+        tool_variants.append(
+            (
+                "Claude",
+                code_review.CodeReviewTool(
+                    llm=llms.create_anthropic_llm(),
+                    function_search=function_search,
+                    review_comments_db=review_comments_db,
+                    suggestions_feedback_db=suggestions_feedback_db,
+                    verbose=VERBOSE_CODE_REVIEW,
+                ),
+            )
         )
-    )
 
-    tool_variants.append(
-        (
-            "GPT",
-            code_review.CodeReviewTool(
-                llm=llms.create_openai_llm(),
-                function_search=function_search,
-                review_comments_db=review_comments_db,
-                suggestions_feedback_db=suggestions_feedback_db,
-                verbose=VERBOSE_CODE_REVIEW,
-            ),
+    if "gpt" in variants:
+        tool_variants.append(
+            (
+                "GPT",
+                code_review.CodeReviewTool(
+                    llm=llms.create_openai_llm(),
+                    function_search=function_search,
+                    review_comments_db=review_comments_db,
+                    suggestions_feedback_db=suggestions_feedback_db,
+                    verbose=VERBOSE_CODE_REVIEW,
+                ),
+            )
         )
-    )
 
     return tool_variants
 
@@ -348,25 +353,52 @@ def get_latest_evaluation_results_file(results_dir: str | None):
     return latests_files
 
 
+def get_ongoing_evaluation_results_file(results_dir: str | None):
+    import glob
+    import os
+
+    base_file = get_latest_evaluation_results_file(results_dir)
+    files = [
+        file
+        for file in glob.glob("evaluation_results_*.csv", root_dir=results_dir)
+        if "#" not in file and file > base_file
+    ]
+    if not files:
+        raise FileNotFoundError("No ongoing evaluation results file found.")
+
+    latests_file = max(files)
+    if results_dir:
+        return os.path.join(results_dir, latests_file)
+
+    return latests_file
+
+
 def main(args):
     review_platform = "phabricator"
     review_data: code_review.ReviewData = code_review.review_data_classes[
         review_platform
     ]()
 
-    tool_variants = get_tool_variants()
+    tool_variants = get_tool_variants(args.variants)
 
     evaluator = FeedbackEvaluator(args.evaluation_dataset)
 
-    is_first_result = True
     result_file = os.path.join(
         args.results_dir,
         "code_review_tool_evaluator.csv",
     )
-    evaluation_results_file = os.path.join(
-        args.results_dir,
-        f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
-    )
+    is_first_result = not os.path.exists(result_file)
+
+    if is_first_result:
+        evaluation_results_file = os.path.join(
+            args.results_dir,
+            f"evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
+        )
+        seen_patches = set()
+    else:
+        evaluation_results_file = get_ongoing_evaluation_results_file(args.results_dir)
+        seen_patches = set(pd.read_csv(evaluation_results_file)["diff_id"].to_list())
+
     result_unique_columns = ["Review Request ID", "File", "Line", "Comment Number"]
     result_all_columns = result_unique_columns + [
         f"{title} ({variant_name})"
@@ -421,6 +453,18 @@ def main(args):
         )
 
     for review_request_id, review_request in selected_review_requests:
+        if review_request_id in [227266, 233414]:
+            print(
+                f"Skipping Review Request ID {review_request_id} because it is known to cause issues."
+            )
+            continue
+
+        if review_request.patch_id in seen_patches:
+            print(
+                f"Skipping Review Request ID {review_request_id} (Diff ID {review_request.patch_id}) because it was already evaluated."
+            )
+            continue
+
         print("---------------------------------------------------------")
         print(f"Review Request ID: {review_request_id}")
         print(f"Patch ID: {review_request.patch_id}")
@@ -443,6 +487,9 @@ def main(args):
             except code_review.LargeDiffError:
                 print("Skipping the patch because it is too large.")
                 continue
+            except ModelResultError as e:
+                print("Error while running the tool:", e)
+                continue
 
             print_prettified_comments(comments)
             comment_per_line_counter = defaultdict(int)
@@ -548,6 +595,14 @@ def main(args):
         action="store",
         help="the evaluation strategy to use",
     )
+    parser.add_argument(
+        "--variant",
+        dest="variants",
+        action="append",
+        help="the variants to use, use multiple times for multiple variants",
+        choices=["claude", "gpt"],
+        required=True,
+    )
 
     args = parser.parse_args()
 
diff --git a/scripts/code_review_tool_evaluator_report.py b/scripts/code_review_tool_evaluator_report.py
@@ -2,10 +2,11 @@
 
 import pandas as pd
 
-from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file
+import scripts.code_review_tool_evaluator as evaluator_script
 
 evaluation_results = pd.read_csv(
-    get_latest_evaluation_results_file("../evaluation_results")
+    # evaluator_script.get_latest_evaluation_results_file("../evaluation_results")
+    evaluator_script.get_ongoing_evaluation_results_file("../evaluation_results")
 )
 
 # %%

Original file line number	Diff line number	Diff line change
`@@ -235,6 +235,13 @@ def definitions_to_results(self, commit_hash, definitions):`
`235`	`235`	`)`
`236`	`236`	`),`
`237`	`237`	`)`
	`238`	`+ if source is None:`
	`239`	`+ logger.warning(`
	`240`	`+ "Could not extract source for %s:%d",`
	`241`	`+ definition["path"],`
	`242`	`+ definition["start"],`
	`243`	`+ )`
	`244`	`+ continue`
`238`	`245`	`result.append(`
`239`	`246`	`Function(`
`240`	`247`	`definition["name"],`