diff --git a/src/lgtm_ai/formatters/markdown.py b/src/lgtm_ai/formatters/markdown.py index dced7b5..6c2ddae 100644 --- a/src/lgtm_ai/formatters/markdown.py +++ b/src/lgtm_ai/formatters/markdown.py @@ -19,10 +19,10 @@ class MarkDownFormatter(Formatter[str]): def __init__(self) -> None: template_dir = pathlib.Path(__file__).parent / "templates" - self.env = Environment(loader=FileSystemLoader(template_dir), autoescape=True) + self._template_env = Environment(loader=FileSystemLoader(template_dir), autoescape=True) def format_review_summary_section(self, review: Review, comments: list[ReviewComment] | None = None) -> str: - template = self.env.get_template(self.REVIEW_SUMMARY_TEMPLATE) + template = self._template_env.get_template(self.REVIEW_SUMMARY_TEMPLATE) comments_section = self.format_review_comments_section(comments or []) metadata = self._format_metadata(review.metadata) return template.render( @@ -36,12 +36,12 @@ def format_review_summary_section(self, review: Review, comments: list[ReviewCom def format_review_comments_section(self, comments: list[ReviewComment]) -> str: if not comments: return "" - template = self.env.get_template(self.REVIEW_COMMENTS_SECTION_TEMPLATE) + template = self._template_env.get_template(self.REVIEW_COMMENTS_SECTION_TEMPLATE) rendered_comments = [self.format_review_comment(comment, with_footer=False) for comment in comments] return template.render(comments=rendered_comments) def format_review_comment(self, comment: ReviewComment, *, with_footer: bool = True) -> str: - template = self.env.get_template(self.REVIEW_COMMENT_TEMPLATE) + template = self._template_env.get_template(self.REVIEW_COMMENT_TEMPLATE) header_category = CATEGORY_MAP[comment.category] severity_icon = SEVERITY_MAP[comment.severity] snippet = self._format_snippet(comment) if comment.quote_snippet else None @@ -59,7 +59,7 @@ def format_review_comment(self, comment: ReviewComment, *, with_footer: bool = T ) def format_guide(self, guide: ReviewGuide) -> str: - template = self.env.get_template(self.REVIEW_GUIDE_TEMPLATE) + template = self._template_env.get_template(self.REVIEW_GUIDE_TEMPLATE) key_changes = guide.guide_response.key_changes checklist = guide.guide_response.checklist references = guide.guide_response.references @@ -73,16 +73,16 @@ def format_guide(self, guide: ReviewGuide) -> str: ) def _format_snippet(self, comment: ReviewComment) -> str: - template = self.env.get_template(self.SNIPPET_TEMPLATE) + template = self._template_env.get_template(self.SNIPPET_TEMPLATE) return template.render(language=comment.programming_language.lower(), snippet=comment.quote_snippet) def _format_usages_summary(self, usages: list[Usage]) -> str: - template = self.env.get_template(self.USAGES_SUMMARY_TEMPLATE) + template = self._template_env.get_template(self.USAGES_SUMMARY_TEMPLATE) total_tokens = sum(usage.total_tokens or 0 for usage in usages) return template.render(usages=usages, total_tokens=total_tokens) def _format_metadata(self, metadata: PublishMetadata) -> str: - template = self.env.get_template(self.METADATA_TEMPLATE) + template = self._template_env.get_template(self.METADATA_TEMPLATE) usages_summary = self._format_usages_summary(metadata.usages) return template.render( uuid=metadata.uuid, diff --git a/src/lgtm_ai/review/prompt_generators.py b/src/lgtm_ai/review/prompt_generators.py index 76b465d..0712669 100644 --- a/src/lgtm_ai/review/prompt_generators.py +++ b/src/lgtm_ai/review/prompt_generators.py @@ -1,6 +1,9 @@ import json import logging +import pathlib +from typing import ClassVar +from jinja2 import Environment, FileSystemLoader from lgtm_ai.ai.schemas import AdditionalContext, ReviewResponse from lgtm_ai.base.exceptions import NothingToReviewError from lgtm_ai.base.utils import file_matches_any_pattern @@ -13,10 +16,16 @@ class PromptGenerator: """Generates the prompts for the AI model to review the PR.""" + REVIEW_TEMPLATE: ClassVar[str] = "review_prompt.txt.j2" + SUMMARIZING_TEMPLATE: ClassVar[str] = "summarizing_prompt.txt.j2" + def __init__(self, config: ResolvedConfig, pr_metadata: PRMetadata) -> None: self.config = config self.pr_metadata = pr_metadata + template_dir = pathlib.Path(__file__).parent / "templates" + self._template_env = Environment(loader=FileSystemLoader(template_dir), autoescape=False) # noqa: S701 + def generate_review_prompt( self, *, pr_diff: PRDiff, context: PRContext, additional_context: list[AdditionalContext] | None = None ) -> str: @@ -24,35 +33,12 @@ def generate_review_prompt( It includes the diff and the context of the PR, formatted for the AI to receive. """ - # PR metadata section - pr_metadata_prompt = self._pr_metadata_prompt(self.pr_metadata) - # Diff section - diff_prompt = self._pr_diff_prompt(pr_diff) - - # Context section - context_prompt = "" - if context: - all_file_contexts = [ - self._generate_context_prompt_for_file(file_context) for file_context in context.file_contents - ] - context_prompt = "Context:\n" - context_prompt += "\n\n".join(all_file_contexts) - - if additional_context: - all_additional_contexts = [self._generate_additional_context_prompt(ac) for ac in additional_context] - add_context_prompt = "Additional context:\n" - - if context_prompt: - context_prompt += f"\n{add_context_prompt}" - else: - context_prompt = add_context_prompt - - context_prompt += "\n\n".join(all_additional_contexts) - - return ( - f"{pr_metadata_prompt}\n{diff_prompt}\n{context_prompt}" - if context or additional_context - else f"{pr_metadata_prompt}\n{diff_prompt}" + template = self._template_env.get_template(self.REVIEW_TEMPLATE) + return template.render( + metadata=self.pr_metadata, + diff=self._serialize_pr_diff(pr_diff), + context=self._filter_context_based_on_exclusions(context.file_contents), + additional_context=additional_context, ) def generate_summarizing_prompt(self, *, pr_diff: PRDiff, raw_review: ReviewResponse) -> str: @@ -60,10 +46,12 @@ def generate_summarizing_prompt(self, *, pr_diff: PRDiff, raw_review: ReviewResp It includes the diff and the review, formatted for the AI to receive. """ - pr_metadata_prompt = self._pr_metadata_prompt(self.pr_metadata) - diff_prompt = self._pr_diff_prompt(pr_diff) - review_prompt = f"Review: {raw_review.model_dump()}\n" - return f"{pr_metadata_prompt}\n{diff_prompt}\n{review_prompt}" + template = self._template_env.get_template(self.SUMMARIZING_TEMPLATE) + return template.render( + metadata=self.pr_metadata, + diff=self._serialize_pr_diff(pr_diff), + review=raw_review.model_dump(), + ) def generate_guide_prompt( self, *, pr_diff: PRDiff, context: PRContext, additional_context: list[AdditionalContext] | None = None @@ -72,31 +60,10 @@ def generate_guide_prompt( pr_diff=pr_diff, context=context, additional_context=additional_context ) # FIXME: They are the same for now? - def _generate_context_prompt_for_file(self, file_context: PRContextFileContents) -> str: - """Generate context prompt for a single file in the PR. - - It excludes files according to the `exclude` patterns in the config. - """ - if file_matches_any_pattern(file_context.file_path, self.config.exclude): - logger.debug("Excluding file %s from context", file_context.file_path) - return "" - - content = self._indent(file_context.content) - return f" ```{file_context.file_path}, branch={file_context.branch}\n{content}\n ```" - - def _generate_additional_context_prompt(self, additional_context: AdditionalContext) -> str: - if not additional_context.context: - return "" - context = self._indent(additional_context.context) - return f" ```file={additional_context.file_url}; prompt={additional_context.prompt}\n{context}\n ```" - - def _pr_diff_prompt(self, pr_diff: PRDiff) -> str: - return f"PR Diff:\n ```\n{self._indent(self._serialize_pr_diff(pr_diff))}\n ```" - - def _pr_metadata_prompt(self, pr_metadata: PRMetadata) -> str: - return "PR Metadata:\n" + self._indent( - f"```Title\n{pr_metadata.title}\n```\n" + f"```Description\n{pr_metadata.description or ''}\n```\n" - ) + def _filter_context_based_on_exclusions( + self, file_context: list[PRContextFileContents] + ) -> list[PRContextFileContents]: + return [fc for fc in file_context if not file_matches_any_pattern(fc.file_path, self.config.exclude)] def _serialize_pr_diff(self, pr_diff: PRDiff) -> str: """Serialize the PR diff to a JSON string for the AI model. @@ -116,8 +83,3 @@ def _serialize_pr_diff(self, pr_diff: PRDiff) -> str: if not keep: raise NothingToReviewError(exclude=self.config.exclude) return json.dumps(keep) - - def _indent(self, text: str, level: int = 4) -> str: - """Indent the text by a given number of spaces.""" - indent = " " * level - return "\n".join(f"{indent}{line}" for line in text.splitlines()) diff --git a/src/lgtm_ai/review/templates/review_prompt.txt.j2 b/src/lgtm_ai/review/templates/review_prompt.txt.j2 new file mode 100644 index 0000000..93a2e90 --- /dev/null +++ b/src/lgtm_ai/review/templates/review_prompt.txt.j2 @@ -0,0 +1,26 @@ +PR METADATA: +- Title: {{ metadata.title }} +- Description: {{ metadata.description }} + +PR DIFF: +``` +{{ diff }} +``` + +{% if context %} +CONTEXT: +{% for context_file in context %} +```file={{ context_file.file_path}}, branch={{ context_file.branch }} +{{ context_file.content }} +``` +{% endfor -%} +{% endif -%} + +{% if additional_context %} +ADDITIONAL CONTEXT: +{% for context in additional_context %} +```file={{ context.file_url }}, prompt={{ context.prompt }} +{{ context.context }} +``` +{% endfor -%} +{% endif -%} diff --git a/src/lgtm_ai/review/templates/summarizing_prompt.txt.j2 b/src/lgtm_ai/review/templates/summarizing_prompt.txt.j2 new file mode 100644 index 0000000..e914e17 --- /dev/null +++ b/src/lgtm_ai/review/templates/summarizing_prompt.txt.j2 @@ -0,0 +1,13 @@ +PR METADATA: +- Title: {{ metadata.title }} +- Description: {{ metadata.description }} + +PR DIFF: +``` +{{ diff }} +``` + +REVIEW: +``` +{{ review }} +``` \ No newline at end of file diff --git a/tests/review/test_reviewer.py b/tests/review/test_reviewer.py index ea45666..c91ceec 100644 --- a/tests/review/test_reviewer.py +++ b/tests/review/test_reviewer.py @@ -18,6 +18,7 @@ from pydantic_ai.messages import ModelMessage, ModelRequest from pydantic_ai.models.openai import OpenAIModel from pydantic_ai.models.test import TestModel +from pydantic_ai.usage import Usage from tests.review.utils import MOCK_DIFF, MockGitClient # This is a safety measure to make sure we don't accidentally make real requests to the LLM while testing, @@ -59,6 +60,11 @@ def test_get_review_from_url_valid() -> None: prompt="These are the development guidelines for the project. Please follow them.", context="contents-of-dev-guidelines", ), + AdditionalContext( + file_url=None, + prompt="Yet another prompt", + context="yet-another-context", + ), ) ), ) @@ -76,38 +82,98 @@ def test_get_review_from_url_valid() -> None: ) # There are messages with the correct prompts to the AI agent + expected_message = ( + textwrap.dedent( + f""" + PR METADATA: + - Title: foo + - Description: bar + + PR DIFF: + ``` + {json.dumps([diff.model_dump() for diff in MOCK_DIFF])} + ``` + + + CONTEXT: + + ```file=file1.txt, branch=source + contents-of-file-1-context + ``` + + ```file=file2.txt, branch=source + contents-of-file-2-context + ``` + + ADDITIONAL CONTEXT: + + ```file=None, prompt=These are the development guidelines for the project. Please follow them. + contents-of-dev-guidelines + ``` + + ```file=None, prompt=Yet another prompt + yet-another-context + ``` + """ + ).strip() + + "\n" + ) + _assert_agent_message( + messages, + expected_message, + expected_messages=4, + expected_prompts=["system-prompt", "system-prompt", "system-prompt", "user-prompt"], + ) + + +def test_summarizing_message_in_review() -> None: + test_agent = mock.Mock() + test_summarizing_agent = get_summarizing_agent_with_settings() + test_agent.run_sync.return_value = mock.Mock( + output=ReviewResponse(summary="a", raw_score=1), + usage=lambda: Usage(requests=1, request_tokens=1041, response_tokens=6, total_tokens=1047), + ) + + with ( + test_summarizing_agent.override( + model=TestModel(), + ), + capture_run_messages() as messages, + ): + code_reviewer = CodeReviewer( + reviewer_agent=test_agent, + summarizing_agent=test_summarizing_agent, + model=mock.Mock(spec=OpenAIModel, model_name=DEFAULT_AI_MODEL), + git_client=MockGitClient(), + config=ResolvedConfig(), + ) + code_reviewer.review_pull_request(pr_url=PRUrl(full_url="foo", repo_path="foo", pr_number=1, source="gitlab")) + + review = {"summary": "a", "comments": [], "raw_score": 1, "score": "Abandon"} expected_message = textwrap.dedent( f""" - PR Metadata: - ```Title - foo - ``` - ```Description - bar - ``` - PR Diff: - ``` - {json.dumps([diff.model_dump() for diff in MOCK_DIFF])} - ``` - Context: - ```file1.txt, branch=source - contents-of-file-1-context - ``` - - ```file2.txt, branch=source - contents-of-file-2-context - ``` - Additional context: - ```file=None; prompt=These are the development guidelines for the project. Please follow them. - contents-of-dev-guidelines - ``` + PR METADATA: + - Title: foo + - Description: bar + + PR DIFF: + ``` + {json.dumps([diff.model_dump() for diff in MOCK_DIFF])} + ``` + + REVIEW: + ``` + {review} + ``` + + """ ).strip() _assert_agent_message( messages, expected_message, - expected_messages=4, - expected_prompts=["system-prompt", "system-prompt", "system-prompt", "user-prompt"], + expected_messages=3, + expected_prompts=["system-prompt", "system-prompt", "user-prompt"], )