mozilla · suhaibmujahid · Dec 24, 2025 · Dec 22, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/bugbug/code_search/mozilla.py b/bugbug/code_search/mozilla.py
@@ -10,10 +10,10 @@
 
 
 class FunctionSearchMozilla(FunctionSearch):
-    def __init__(self, repo_dir, get_file, fast=False):
+    def __init__(self, repo_dir, get_file=None, fast=False):
         super().__init__()
         self.repo_dir = repo_dir
-        self.get_file = get_file
+        self.get_file = get_file or FunctionSearchSearchfoxAPI._get_file
         self.fast = fast
 
     def get_function(

diff --git a/bugbug/code_search/searchfox_api.py b/bugbug/code_search/searchfox_api.py
@@ -214,9 +214,18 @@ def search(commit_hash, symbol_name):
 
 
 class FunctionSearchSearchfoxAPI(FunctionSearch):
-    def __init__(self, get_file):
+    def __init__(self, get_file=None):
         super().__init__()
-        self.get_file = get_file
+        self.get_file = get_file or self._get_file
+
+    @staticmethod
+    def _get_file(commit_hash, path):
+        r = utils.get_session("hgmo").get(
+            f"https://hg.mozilla.org/mozilla-unified/raw-file/{commit_hash}/{path}",
+            headers={"User-Agent": utils.get_user_agent()},
+        )
+        r.raise_for_status()
+        return r.text
 
     def definitions_to_results(self, commit_hash, definitions):
         result = []

diff --git a/bugbug/tools/base.py b/bugbug/tools/base.py
@@ -8,13 +8,5 @@
 
 
 class GenerativeModelTool(ABC):
-    @property
-    @abstractmethod
-    def version(self) -> str: ...
-
     @abstractmethod
     def run(self, *args, **kwargs) -> Any: ...
-
-    @staticmethod
-    def _print_answer(answer):
-        print(f"\u001b[33;1m\033[1;3m{answer}\u001b[0m")
diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py
@@ -9,44 +9,39 @@
 import os
 from datetime import datetime
 from logging import getLogger
-from typing import Iterable, Optional
+from typing import Optional, Protocol
 
 from langchain.agents import create_agent
 from langchain.agents.structured_output import ProviderStrategy
 from langchain.chat_models import BaseChatModel
 from langchain.messages import HumanMessage
-from langchain_classic.chains import LLMChain
-from langchain_classic.prompts import PromptTemplate
 from langgraph.errors import GraphRecursionError
 from pydantic import BaseModel, Field
 from unidiff import PatchSet
 
 from bugbug.code_search.function_search import FunctionSearch
 from bugbug.tools.base import GenerativeModelTool
-from bugbug.tools.code_review.database import ReviewCommentsDB, SuggestionsFeedbackDB
+from bugbug.tools.code_review.database import ReviewCommentsDB
 from bugbug.tools.code_review.langchain_tools import (
     CodeReviewContext,
     create_find_function_definition_tool,
     expand_context,
 )
 from bugbug.tools.code_review.prompts import (
-    DEFAULT_REJECTED_EXAMPLES,
     FIRST_MESSAGE_TEMPLATE,
-    PROMPT_TEMPLATE_FILTERING_ANALYSIS,
-    PROMPT_TEMPLATE_SUMMARIZATION,
     STATIC_COMMENT_EXAMPLES,
     SYSTEM_PROMPT_TEMPLATE,
     TEMPLATE_COMMENT_EXAMPLE,
     TEMPLATE_PATCH_FROM_HUNK,
 )
 from bugbug.tools.code_review.utils import (
+    convert_generated_comments_to_inline,
     format_patch_set,
-    generate_processed_output,
 )
 from bugbug.tools.core.data_types import InlineComment
 from bugbug.tools.core.exceptions import LargeDiffError, ModelResultError
 from bugbug.tools.core.llms import get_tokenizer
-from bugbug.tools.core.platforms.base import Patch
+from bugbug.tools.core.platforms.base import Patch, ReviewData
 
 logger = getLogger(__name__)
 
@@ -73,25 +68,35 @@ class AgentResponse(BaseModel):
     )
 
 
-class CodeReviewTool(GenerativeModelTool):
-    version = "0.0.1"
+class PatchSummarizer(Protocol):
+    def run(self, patch: Patch) -> str: ...
+
+
+class SuggestionFilterer(Protocol):
+    def run(
+        self, suggestions: list[GeneratedReviewComment]
+    ) -> list[GeneratedReviewComment]: ...
+
 
+class CodeReviewTool(GenerativeModelTool):
     def __init__(
         self,
         llm: BaseChatModel,
-        summarization_llm: BaseChatModel,
-        filtering_llm: BaseChatModel,
+        patch_summarizer: PatchSummarizer,
+        suggestion_filterer: SuggestionFilterer,
+        review_data: ReviewData,
         function_search: Optional[FunctionSearch] = None,
         review_comments_db: Optional["ReviewCommentsDB"] = None,
         show_patch_example: bool = False,
         verbose: bool = True,
-        suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None,
         target_software: str = "Mozilla Firefox",
     ) -> None:
         super().__init__()
 
         self.target_software = target_software
 
+        self.review_data = review_data
+
         self._tokenizer = get_tokenizer(
             llm.model_name if hasattr(llm, "model_name") else ""
         )
@@ -108,24 +113,8 @@ def __init__(
                 "----------------------------------------------------"
             )
 
-        self.summarization_chain = LLMChain(
-            prompt=PromptTemplate.from_template(
-                PROMPT_TEMPLATE_SUMMARIZATION,
-                partial_variables={
-                    "experience_scope": f"the {self.target_software} source code"
-                },
-            ),
-            llm=summarization_llm,
-            verbose=verbose,
-        )
-        self.filtering_chain = LLMChain(
-            prompt=PromptTemplate.from_template(
-                PROMPT_TEMPLATE_FILTERING_ANALYSIS,
-                partial_variables={"target_code_consistency": self.target_software},
-            ),
-            llm=filtering_llm,
-            verbose=verbose,
-        )
+        self.patch_summarizer = patch_summarizer
+        self.suggestion_filterer = suggestion_filterer
 
         tools = [expand_context]
         if function_search:
@@ -146,60 +135,77 @@ def __init__(
 
         self.verbose = verbose
 
-        self.suggestions_feedback_db = suggestions_feedback_db
-
     @staticmethod
-    def create(
-        llm=None, summarization_llm=None, filtering_llm=None, **kwargs
-    ) -> "CodeReviewTool":
-        from bugbug.tools.core.llms import create_anthropic_llm
-
-        return CodeReviewTool(
-            llm=llm
-            or create_anthropic_llm(
+    def create(**kwargs):
+        """Factory method to instantiate the tool with default dependencies.
+
+        This method takes the same parameters as the constructor, but all
+        parameters are optional. If a parameter is not provided, a default
+        component will be created and used.
+        """
+        if "function_search" not in kwargs:
+            from bugbug.code_search.searchfox_api import FunctionSearchSearchfoxAPI
+
+            kwargs["function_search"] = FunctionSearchSearchfoxAPI()
+
+        if "review_comments_db" not in kwargs:
+            from bugbug.tools.code_review.database import ReviewCommentsDB
+            from bugbug.vectordb import QdrantVectorDB
+
+            kwargs["review_comments_db"] = ReviewCommentsDB(
+                QdrantVectorDB("diff_comments")
+            )
+
+        if "review_data" not in kwargs:
+            from bugbug.tools.core.platforms.phabricator import PhabricatorReviewData
+
+            kwargs["review_data"] = PhabricatorReviewData()
+
+        if "llm" not in kwargs:
+            from bugbug.tools.core.llms import create_anthropic_llm
+
+            kwargs["llm"] = create_anthropic_llm(
                 model_name="claude-opus-4-5-20251101",
                 max_tokens=40_000,
                 temperature=None,
                 thinking={"type": "enabled", "budget_tokens": 10_000},
-            ),
-            summarization_llm=summarization_llm or create_anthropic_llm(),
-            filtering_llm=filtering_llm or create_anthropic_llm(),
-            **kwargs,
-        )
+            )
 
-    def count_tokens(self, text):
-        return len(self._tokenizer.encode(text))
+        if "patch_summarizer" not in kwargs:
+            from bugbug.tools.patch_summarization.agent import PatchSummarizationTool
 
-    def generate_initial_prompt(self, patch: Patch) -> str:
-        formatted_patch = format_patch_set(patch.patch_set)
+            kwargs["patch_summarizer"] = PatchSummarizationTool.create()
 
-        output_summarization = self.summarization_chain.invoke(
-            {
-                "patch": formatted_patch,
-                "bug_title": patch.bug_title,
-                "patch_title": patch.patch_title,
-                "patch_description": patch.patch_description,
-            },
-            return_only_outputs=True,
-        )["text"]
+        if "suggestion_filterer" not in kwargs:
+            from bugbug.tools.suggestion_filtering.agent import SuggestionFilteringTool
 
-        if self.verbose:
-            GenerativeModelTool._print_answer(output_summarization)
+            kwargs["suggestion_filterer"] = SuggestionFilteringTool.create()
 
+        return CodeReviewTool(**kwargs)
+
+    def count_tokens(self, text):
+        return len(self._tokenizer.encode(text))
+
+    def generate_initial_prompt(self, patch: Patch, patch_summary: str) -> str:
         created_before = patch.date_created if self.is_experiment_env else None
+
         return FIRST_MESSAGE_TEMPLATE.format(
-            patch=formatted_patch,
-            patch_summarization=output_summarization,
+            patch=format_patch_set(patch.patch_set),
+            patch_summarization=patch_summary,
             comment_examples=self._get_comment_examples(patch, created_before),
             approved_examples=self._get_generated_examples(patch, created_before),
         )
 
-    def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]:
+    def generate_review_comments(
+        self, patch: Patch, patch_summary: str
+    ) -> list[GeneratedReviewComment]:
         try:
             for chunk in self.agent.stream(
                 {
                     "messages": [
-                        HumanMessage(self.generate_initial_prompt(patch)),
+                        HumanMessage(
+                            self.generate_initial_prompt(patch, patch_summary)
+                        ),
                     ]
                 },
                 context=CodeReviewContext(patch=patch),
@@ -212,35 +218,26 @@ def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]:
 
         return result["structured_response"].comments
 
+    def run_by_diff_id(self, diff_id: str) -> list[InlineComment] | None:
-    def run_by_diff_id(self, diff_id: str) -> list[InlineComment] | None:
+    def run_by_diff_id(self, diff_id: str | int) -> list[InlineComment] | None:
-    def run_by_diff_id(self, diff_id: str) -> list[InlineComment] | None:
+    def run_by_diff_id(self, diff_id: str | int) -> list[InlineComment] | None:
+        patch = self.review_data.get_patch_by_id(diff_id)
+        return self.run(patch)
+
     def run(self, patch: Patch) -> list[InlineComment] | None:
         if self.count_tokens(patch.raw_diff) > 21000:
             raise LargeDiffError("The diff is too large")
 
-        unfiltered_suggestions = self._generate_suggestions(patch)
+        patch_summary = self.patch_summarizer.run(patch)
+
+        unfiltered_suggestions = self.generate_review_comments(patch, patch_summary)
         if not unfiltered_suggestions:
             logger.info("No suggestions were generated")
             return []
 
-        rejected_examples = (
-            "\n    - ".join(self.get_similar_rejected_comments(unfiltered_suggestions))
-            if self.suggestions_feedback_db
-            else DEFAULT_REJECTED_EXAMPLES
-        )
-
-        raw_output = self.filtering_chain.invoke(
-            {
-                "comments": str(
-                    [comment.model_dump() for comment in unfiltered_suggestions]
-                ),
-                "rejected_examples": rejected_examples,
-            },
-            return_only_outputs=True,
-        )["text"]
-
-        if self.verbose:
-            GenerativeModelTool._print_answer(raw_output)
+        filtered_suggestions = self.suggestion_filterer.run(unfiltered_suggestions)
 
-        return list(generate_processed_output(raw_output, patch.patch_set))
+        return list(
+            convert_generated_comments_to_inline(filtered_suggestions, patch.patch_set)
+        )
 
     def _get_generated_examples(self, patch, created_before: datetime | None = None):
         """Get examples of comments that were generated by an LLM.
@@ -320,24 +317,3 @@ def generate_formatted_patch_from_raw_hunk(raw_hunk, filename):
             )
             for num, example in enumerate(comment_examples)
         )
-
-    def get_similar_rejected_comments(
-        self, suggestions: list[GeneratedReviewComment]
-    ) -> Iterable[str]:
-        if not self.suggestions_feedback_db:
-            raise Exception("Suggestions feedback database is not available")
-
-        num_examples_per_suggestion = 10 // len(suggestions) or 1
-        seen_ids: set[int] = set()
-
-        for suggestion in suggestions:
-            similar_rejected_suggestions = (
-                self.suggestions_feedback_db.find_similar_rejected_suggestions(
-                    suggestion.comment,
-                    limit=num_examples_per_suggestion,
-                    excluded_ids=seen_ids,
-                )
-            )
-            for rejected_suggestion in similar_rejected_suggestions:
-                seen_ids.add(rejected_suggestion.id)
-                yield rejected_suggestion.comment