branch_worker: PR comments pre-processing

theihor · theihor · commit 40da4351f02a · 2025-10-30T11:07:35.000-07:00
The format of PR comments (which is github-style markdown) may not be
suitable for plain-text email responses, depending on the use-case.

Implement a mechanism for pre-processing the contents of PR comments
before forwarding them with email.

The function transforming the content string must be a method of
BranchWorker class, and then its name can be specified (or not) in the
pr_comments_forwarding of the KPD config.

If pre-processing function is not specified, couldn't be found or
fails, the comment text remains unchanged.

Signed-off-by: Ihor Solodrai &lt;ihor.solodrai@linux.dev&gt;
diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py
@@ -28,7 +28,18 @@
 from enum import Enum
 from pathlib import Path
 from subprocess import PIPE
-from typing import Any, Dict, Final, Generator, IO, List, Optional, Sequence, Tuple
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Final,
+    Generator,
+    IO,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+)
 
 import dateutil.parser
 import git
@@ -1444,6 +1455,40 @@ async def submit_pr_summary(
         )
         pr_summary_report.add(1)
 
+    def ai_review_comment_preprocessor(self, body: str) -> str:
+        # find the first line starting with > and remove everyting prior
+        match = re.search(r"^>.*$", body, re.MULTILINE)
+        if match:
+            body = body[match.start() :]
+        # remove triple backticks
+        body = re.sub(r"^```\s*$\n?", "", body, flags=re.MULTILINE)
+        # remove In-Reply-To-Subject tag
+        body = re.sub(r"^In-Reply-To-Subject:.*$\n?", "", body, flags=re.MULTILINE)
+        return body
+
+    def pr_comment_body_to_email_body(
+        self, cfg: PRCommentsForwardingConfig, comment_body: str
+    ) -> str:
+        func_name: Optional[str] = cfg.body_preprocessor_func
+        if func_name is None:
+            return comment_body
+
+        maybe_func = getattr(self, func_name, None)
+        if not callable(maybe_func):
+            return comment_body
+
+        func: Callable = maybe_func
+        try:
+            email_body = func(comment_body)
+            if not isinstance(email_body, str):
+                return comment_body
+            return email_body
+        except Exception as e:
+            logger.error(
+                f"Comment body preprocessor '{func_name}' failed with exception: {e}"
+            )
+            return comment_body
+
     async def forward_pr_comments(self, pr: PullRequest, series: Series):
         # The checks and local variables are needed to make type checker happy
         if self.email_config is None:
@@ -1510,9 +1555,8 @@ async def forward_pr_comments(self, pr: PullRequest, series: Series):
             patch_msg = parser.parsebytes(mbox, headersonly=True)
 
             # and forward the target comment via email
-            sent_msg_id = await send_pr_comment_email(
-                email_cfg, patch_msg, comment.body
-            )
+            email_body: str = self.pr_comment_body_to_email_body(cfg, comment.body)
+            sent_msg_id = await send_pr_comment_email(email_cfg, patch_msg, email_body)
             if sent_msg_id is not None:
                 logger.info(
                     f"Forwarded PR comment {comment.html_url} via email, Message-Id: {sent_msg_id}"
diff --git a/kernel_patches_daemon/config.py b/kernel_patches_daemon/config.py
@@ -12,7 +12,7 @@
 import os
 import re
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Set
+from typing import Dict, List, Optional
 
 
 logger = logging.getLogger(__name__)
@@ -112,6 +112,7 @@ class PRCommentsForwardingConfig:
     commenter_allowlist: List[str]
     recipient_allowlist: List[re.Pattern]
     recipient_denylist: List[re.Pattern]
+    body_preprocessor_func: Optional[str]
 
     @classmethod
     def from_json(cls, json: Dict) -> "PRCommentsForwardingConfig":
@@ -126,6 +127,7 @@ def from_json(cls, json: Dict) -> "PRCommentsForwardingConfig":
                 re.compile(pattern) for pattern in json.get("recipient_denylist", [])
             ],
             commenter_allowlist=json.get("commenter_allowlist", []),
+            body_preprocessor_func=json.get("body_preprocessor_func", None),
         )
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -68,4 +68,3 @@ project-excludes = [
     "**/__pycache__",
     "**/*venv/**/*",
 ]
-
diff --git a/tests/data/test_ai_review_comment_preprocessor/comment-body.md b/tests/data/test_ai_review_comment_preprocessor/comment-body.md
@@ -0,0 +1,57 @@
+
+
+```
+commit 16cbc4520eb13d2065d145c76b97fb93035d81e8
+Author: KaFai Wan <kafai.wan@linux.dev>
+
+bpf: Fix tnum_overlap to check for zero mask intersection
+
+This patch adds an early return check in tnum_overlap() to detect
+non-overlapping tnums when their masks have no overlapping bits. The
+fix addresses a syzbot report where the verifier failed to recognize
+that two tnums couldn't represent the same value.
+
+Reported-by: syzbot+c950cc277150935cc0b5@syzkaller.appspotmail.com
+
+> diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
+> index f8e70e9c3..1a75b7c9a 100644
+> --- a/kernel/bpf/tnum.c
+> +++ b/kernel/bpf/tnum.c
+> @@ -163,6 +163,8 @@ bool tnum_overlap(struct tnum a, struct tnum b)
+>  {
+>  	u64 mu;
+>
+> +	if (a.mask && b.mask && !(a.mask & b.mask))
+> +		return false;
+                              ^^^^^
+
+Does this logic correctly detect non-overlapping tnums? Consider this
+counter-example:
+
+  a = (value=0x5, mask=0xA)  // binary: value=0101, mask=1010
+  b = (value=0xA, mask=0x5)  // binary: value=1010, mask=0101
+
+The masks are disjoint: (0xA & 0x5) == 0, so the new check returns
+false (no overlap).
+
+However, these tnums actually represent overlapping sets:
+  a represents: {5, 7, 13, 15}  // 0b0101, 0b0111, 0b1101, 0b1111
+  b represents: {10, 11, 14, 15} // 0b1010, 0b1011, 0b1110, 0b1111
+
+Both sets contain 15, so they do overlap. Can this cause incorrect
+verifier behavior when is_branch_taken() calls tnum_overlap() to
+determine if JEQ/JNE branches are reachable?
+
+>  	mu = ~a.mask & ~b.mask;
+>  	return (a.value & mu) == (b.value & mu);
+>  }
+
+
+```
+
+---
+AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
+See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
+
+In-Reply-To-Subject: `bpf: Fix tnum_overlap to check for zero mask intersection`
+CI run summary: https://github.com/kernel-patches/bpf/actions/runs/18880108453
diff --git a/tests/data/test_ai_review_comment_preprocessor/expected-email-body.txt b/tests/data/test_ai_review_comment_preprocessor/expected-email-body.txt
@@ -0,0 +1,39 @@
+> diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
+> index f8e70e9c3..1a75b7c9a 100644
+> --- a/kernel/bpf/tnum.c
+> +++ b/kernel/bpf/tnum.c
+> @@ -163,6 +163,8 @@ bool tnum_overlap(struct tnum a, struct tnum b)
+>  {
+>  	u64 mu;
+>
+> +	if (a.mask && b.mask && !(a.mask & b.mask))
+> +		return false;
+                              ^^^^^
+
+Does this logic correctly detect non-overlapping tnums? Consider this
+counter-example:
+
+  a = (value=0x5, mask=0xA)  // binary: value=0101, mask=1010
+  b = (value=0xA, mask=0x5)  // binary: value=1010, mask=0101
+
+The masks are disjoint: (0xA & 0x5) == 0, so the new check returns
+false (no overlap).
+
+However, these tnums actually represent overlapping sets:
+  a represents: {5, 7, 13, 15}  // 0b0101, 0b0111, 0b1101, 0b1111
+  b represents: {10, 11, 14, 15} // 0b1010, 0b1011, 0b1110, 0b1111
+
+Both sets contain 15, so they do overlap. Can this cause incorrect
+verifier behavior when is_branch_taken() calls tnum_overlap() to
+determine if JEQ/JNE branches are reachable?
+
+>  	mu = ~a.mask & ~b.mask;
+>  	return (a.value & mu) == (b.value & mu);
+>  }
+
+
+---
+AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
+See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
+
+CI run summary: https://github.com/kernel-patches/bpf/actions/runs/18880108453
diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py
@@ -1671,6 +1671,7 @@ async def test_forward_pr_comments(self, m: aioresponses) -> None:
                 commenter_allowlist=["test_user"],
                 recipient_denylist=[re.compile(".*@vger.kernel.org")],
                 recipient_allowlist=[],
+                body_preprocessor_func=None,
             )
         )
 
@@ -1724,3 +1725,76 @@ async def test_forward_pr_comments(self, m: aioresponses) -> None:
             )
             self.assertEqual(mock_comment.body, body)
             self.assertEqual(f"<{mbox_msgid}>", in_reply_to)
+
+
+class TestPRCommentBodyPreprocessor(unittest.TestCase):
+    def setUp(self) -> None:
+        patcher = patch("kernel_patches_daemon.github_connector.Github")
+        self._gh_mock = patcher.start()
+        self.addCleanup(patcher.stop)
+        self._bw = BranchWorkerMock()
+
+    def test_ai_review_comment_preprocessor(self):
+        comment_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/comment-body.md"
+        )
+        expected_email_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/expected-email-body.txt"
+        )
+        email_body = self._bw.ai_review_comment_preprocessor(comment_body)
+        print(email_body)
+        self.assertEqual(email_body, expected_email_body)
+
+    def test_pr_comment_body_to_email_body_with_no_preprocessor(self):
+        cfg = PRCommentsForwardingConfig(
+            enabled=True,
+            always_reply_to_author=False,
+            always_cc=[],
+            commenter_allowlist=[],
+            recipient_allowlist=[],
+            recipient_denylist=[],
+            body_preprocessor_func=None,
+        )
+        comment_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/comment-body.md"
+        )
+        email_body = self._bw.pr_comment_body_to_email_body(cfg, comment_body)
+        # Expect no change
+        self.assertEqual(email_body, comment_body)
+
+    def test_pr_comment_body_to_email_body_with_valid_preprocessor(self):
+        cfg = PRCommentsForwardingConfig(
+            enabled=True,
+            always_reply_to_author=False,
+            always_cc=[],
+            commenter_allowlist=[],
+            recipient_allowlist=[],
+            recipient_denylist=[],
+            body_preprocessor_func="ai_review_comment_preprocessor",
+        )
+        comment_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/comment-body.md"
+        )
+        expected_email_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/expected-email-body.txt"
+        )
+        email_body = self._bw.pr_comment_body_to_email_body(cfg, comment_body)
+        self.assertEqual(email_body, expected_email_body)
+
+    def test_pr_comment_body_to_email_body_with_invalid_preprocessor(self):
+        # Setup: create config with non-existent preprocessor function
+        cfg = PRCommentsForwardingConfig(
+            enabled=True,
+            always_reply_to_author=False,
+            always_cc=[],
+            commenter_allowlist=[],
+            recipient_allowlist=[],
+            recipient_denylist=[],
+            body_preprocessor_func="nonexistent_function",
+        )
+        comment_body = read_test_data_file(
+            "test_ai_review_comment_preprocessor/comment-body.md"
+        )
+        email_body = self._bw.pr_comment_body_to_email_body(cfg, comment_body)
+        # Expect no change
+        self.assertEqual(email_body, comment_body)
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -179,6 +179,7 @@ def test_valid(self) -> None:
                     commenter_allowlist=["kpd-bot[bot]"],
                     recipient_denylist=[re.compile(".*@vger.kernel.org")],
                     recipient_allowlist=[],
+                    body_preprocessor_func=None,
                 ),
             ),
             tag_to_branch_mapping={"tag": ["app_auth_key_path"]},

Original file line number	Diff line number	Diff line change
`@@ -68,4 +68,3 @@ project-excludes = [`
`68`	`68`	`"**/__pycache__",`
`69`	`69`	`"*/venv/*/",`
`70`	`70`	`]`
`71`		`-`