From e28008db5a2552456621ac77ba77c2d458c88870 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:19:16 -0700 Subject: [PATCH 1/6] Don't rely on subject name when determinig PR for the series The fix in series to git branch mapping 88cb274 made _guess_pr() to work incorrectly due to a wrong assumption there. _guess_pr() looks up a known open PR *by subject name*, which may not necessarily be the right PR for a given series. Before 88cb274 the branch name was (incorrectly) based on the oldest series id. However currently open PRs still use the wrong branch name. _guess_pr() then returns a wrong cached PR/branch. This leads to an exception in apply_push_comment(), because it tries to verify that PR was successfully updated, but looks at a wrong PR. Fix this by not using lookup in local cache by name in guess_pr(). Differential Revision: D79654888 Reviewed-by: Jordan Rome Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/branch_worker.py | 4 ---- tests/test_branch_worker.py | 10 ---------- 2 files changed, 14 deletions(-) diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index ec5c7b5..dc278b3 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -767,10 +767,6 @@ async def _guess_pr( - try to guess based on first series """ - # try to find amond known relevant PRs - if series.subject in self.prs: - return self.prs[series.subject] - if not branch: # resolve branch: series -> subject -> branch subject = Subject(series.subject, self.patchwork) diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index 1b1ef86..08b9fd4 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -633,16 +633,6 @@ def test_delete_branches(self) -> None: ggr.assert_called_once_with(f"heads/{branch_deleted}") ggr.return_value.delete.assert_called_once() - @aioresponses() - async def test_guess_pr_return_from_active_pr_cache(self, m: aioresponses) -> None: - # Whatever is in our self.prs's cache dictionary will be returned. - series = Series(self._pw, SERIES_DATA) - sentinel = random.random() - # pyre-fixme[6]: For 2nd argument expected `PullRequest` but got `float`. - self._bw.prs["foo"] = sentinel - pr = await self._bw._guess_pr(series) - self.assertEqual(sentinel, pr) - async def test_guess_pr_return_from_secondary_cache_with_specified_branch( self, ) -> None: From 04a966b7a5aa004b0dce7a33b6f1373cf6fef5e5 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:27:41 -0700 Subject: [PATCH 2/6] Fix base branch key in guess_pr() Fix a dormant bug in guess_pr(): wrong base branch key has been used for lookup of synced PRs. Apparently this code path almost never executed until e28008d, which removed another level of "cache" with lookup by name. Differential Revision: D79693152 Reviewed-by: Jordan Rome Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/branch_worker.py | 2 +- tests/test_branch_worker.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index dc278b3..4d9f4ae 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -774,7 +774,7 @@ async def _guess_pr( try: # we assuming only one PR can be active for one head->base - return self.all_prs[branch][self.repo_branch][0] + return self.all_prs[branch][self.repo_pr_base_branch][0] except (KeyError, IndexError): pass diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index 08b9fd4..473c686 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -644,7 +644,7 @@ async def test_guess_pr_return_from_secondary_cache_with_specified_branch( series = Series(self._pw, SERIES_DATA) sentinel = random.random() self._bw.all_prs[mybranch] = {} - self._bw.all_prs[mybranch][TEST_REPO_BRANCH] = [sentinel] + self._bw.all_prs[mybranch][TEST_REPO_PR_BASE_BRANCH] = [sentinel] pr = await self._bw._guess_pr(series, mybranch) self.assertEqual(sentinel, pr) @@ -661,7 +661,7 @@ async def test_guess_pr_return_from_secondary_cache_without_specified_branch( sentinel = random.random() self._bw.all_prs[mybranch] = {} - self._bw.all_prs[mybranch][TEST_REPO_BRANCH] = [sentinel] + self._bw.all_prs[mybranch][TEST_REPO_PR_BASE_BRANCH] = [sentinel] pr = await self._bw._guess_pr(series, mybranch) self.assertEqual(sentinel, pr) From ab7bee784d0b4306beff2978f0938e2b091e5d43 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:33:54 -0700 Subject: [PATCH 3/6] Fix PR matching on cleanup GithubSync.close_existing_prs_for_series() is supposed to close all *other* PRs associated with a series being processed. So far it only looked at the *branch* name (`series/=>`), when deciding whether two PRs match. This worked due to a bug, fixed in 88cb274, which caused the oldest series id (branch name) to always be associated with the series name (PR title). Now that the bug is fixed, we have to match by name during clean up, which is what happens in this change. Differential Revision: D79763361 Reviewed-by: Amery Hung Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/branch_worker.py | 6 ++++ kernel_patches_daemon/github_sync.py | 13 ++++++-- tests/test_branch_worker.py | 41 ++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index 4d9f4ae..1237266 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -436,6 +436,12 @@ def same_series_different_target(ref: str, other_ref: str) -> bool: return ref1["series"] == ref2["series"] and ref1["target"] != ref2["target"] +def prs_for_the_same_series(pr1: PullRequest, pr2: PullRequest) -> bool: + return pr1.title == pr2.title or same_series_different_target( + pr1.head.ref, pr2.head.ref + ) + + def _reset_repo(repo, branch: str) -> None: """ Reset the repository into a known good state, with `branch` checked out. diff --git a/kernel_patches_daemon/github_sync.py b/kernel_patches_daemon/github_sync.py index 422fa3b..e4895fc 100644 --- a/kernel_patches_daemon/github_sync.py +++ b/kernel_patches_daemon/github_sync.py @@ -20,7 +20,7 @@ parse_pr_ref, parsed_pr_ref_ok, pr_has_label, - same_series_different_target, + prs_for_the_same_series, ) from kernel_patches_daemon.config import BranchConfig, KPDConfig from kernel_patches_daemon.github_logs import ( @@ -156,12 +156,19 @@ def close_existing_prs_for_series( the same series name, but different remote branch and close them. """ - prs_to_close = [ + existing_prs = [ existing_pr for worker in workers for existing_pr in worker.prs.values() - if same_series_different_target(pr.head.ref, existing_pr.head.ref) + if existing_pr.number != pr.number + ] + + prs_to_close = [ + existing_pr + for existing_pr in existing_prs + if prs_for_the_same_series(pr, existing_pr) ] + # Remove matching PRs from other workers for pr_to_close in prs_to_close: logging.info( diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index 473c686..a5b2bac 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -6,7 +6,6 @@ # pyre-unsafe -import os import random import re import shutil @@ -14,7 +13,7 @@ import unittest from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from unittest.mock import MagicMock, patch import git @@ -35,6 +34,7 @@ EmailBodyContext, furnish_ci_email_body, parse_pr_ref, + prs_for_the_same_series, same_series_different_target, temporary_patch_file, UPSTREAM_REMOTE_NAME, @@ -892,6 +892,43 @@ def test_same_series_different_target(self) -> None: ) ) + def test_prs_for_the_same_series(self) -> None: + def create_mock_pr(title: Optional[str], head_ref: str) -> MagicMock: + pr = MagicMock() + pr.title = title + pr.head.ref = head_ref + return pr + + with self.subTest("same_title_different_series"): + # Title match should return True regardless of series + pr1 = create_mock_pr("Fix memory leak", "series/123=>main") + pr2 = create_mock_pr("Fix memory leak", "series/456=>bpf-next") + self.assertTrue(prs_for_the_same_series(pr1, pr2)) + + with self.subTest("different_title_same_series_different_target"): + # Same series, different target should return True regardless of title + pr1 = create_mock_pr("Fix memory leak", "series/123=>main") + pr2 = create_mock_pr("Different title", "series/123=>bpf-next") + self.assertTrue(prs_for_the_same_series(pr1, pr2)) + + with self.subTest("different_title_different_series"): + # No match + pr1 = create_mock_pr("Fix memory leak", "series/123=>main") + pr2 = create_mock_pr("Different title", "series/456=>main") + self.assertFalse(prs_for_the_same_series(pr1, pr2)) + + with self.subTest("none_vs_string_title"): + # None vs string title should not match + pr1 = create_mock_pr(None, "series/123=>main") + pr2 = create_mock_pr("Fix memory leak", "series/456=>bpf-next") + self.assertFalse(prs_for_the_same_series(pr1, pr2)) + + with self.subTest("malformed_refs_no_match"): + # Malformed refs with different titles should return False + pr1 = create_mock_pr("Fix memory leak", "invalid-ref-format") + pr2 = create_mock_pr("Different title", "also-invalid") + self.assertFalse(prs_for_the_same_series(pr1, pr2)) + class TestGitSeriesAlreadyApplied(unittest.IsolatedAsyncioTestCase): def setUp(self): From 8634d9520c10e2d084ffd2429fd38bc8f0a7f1c7 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:46:51 -0700 Subject: [PATCH 4/6] Close PRs for which there is no latest_series on sync After recent fixes in subject-branch mapping, when syncing "old" subjects a situation may occur when there are no relevant series for an open PR. This caused the following error message: ``` E0807 21:21:26.207 branch_worker.py:863] BUG: Unable to find PR for selftests/bpf: Install test modules into $INSTALL_PATH https://patchwork.kernel.org/project/netdevbpf/list/?series=985731 ``` Fix it by explicitly checking for latest_series() and closing the PR if it doesn't exist. Differential Revision: D79864591 Reviewed-by: Amery Hung Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/github_sync.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel_patches_daemon/github_sync.py b/kernel_patches_daemon/github_sync.py index e4895fc..c179995 100644 --- a/kernel_patches_daemon/github_sync.py +++ b/kernel_patches_daemon/github_sync.py @@ -336,21 +336,35 @@ async def sync_patches(self) -> None: parsed_ref = parse_pr_ref(pr.head.ref) # ignore unknown format branch/PRs. if not parsed_pr_ref_ok(parsed_ref): - logger.warning( + logger.info( f"Unexpected format of the branch name: {pr.head.ref}" ) continue + if parsed_ref["target"] != worker.repo_branch: + logger.info( + f"Skipping sync of PR {pr.number} ({pr.head.ref}) as it's not for {worker.repo_branch}" + ) + continue + series_id = parsed_ref["series_id"] series = await self.pw.get_series_by_id(series_id) subject = self.pw.get_subject_by_series(series) if subject_name != subject.subject: - logger.warning( + logger.info( f"Renaming {pr} from {subject_name} to {subject.subject} according to {series.id}" ) pr.edit(title=subject.subject) + + latest_series = await subject.latest_series() + if latest_series is None: + logger.warning( + f"Closing {pr} associated with irrelevent or outdated series {series_id}" + ) + pr.edit(state="close") + continue + branch_name = await worker.subject_to_branch(subject) - latest_series = await subject.latest_series() or series pr = await self.checkout_and_patch_safe( worker, branch_name, latest_series ) From d2edb97cc49666e3b9b314d9c0781caed4e43556 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:46:55 -0700 Subject: [PATCH 5/6] pr.update() before processing PR labels while evaluating CI results There is an intermittent email spam issue in rc: KPD tries to update PR labels, say from ci-fail to ci-pass, and immediately updates them back, causing an email notification on every iteration. The logic in the code is reasonable: we remove a "not_label" first, if it's in current labels, and add "new_label" and send a notification if it's not in current labels. The only odd thing that seems to be correlated with this behavior is forward proxy errors, which suggests failures when changing the pr (which makes calls github API). I noticed that KPD is looking at 2-3s old PR object in branch_worker.evaluate_ci_result(). While generally this shouldn't be an issue, let's add an explicit pr.update() call to make sure up to date labels are tested. The hypothesis is that KPD misbehavior might be caused by failed (or implicitly retried by proxy??) pr.remove_from_labels() / pr.add_to_labels() calls. If this is so, then adding pr.update() would shift the failure/retry to a less disruptive GET operation, which will "fix" KPD even if the proxy errors are still there. Differential Revision: D79931497 Reviewed-by: Jordan Rome Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/branch_worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index 1237266..2180143 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -1220,6 +1220,7 @@ async def evaluate_ci_result( new_label = StatusLabelSuffixes.FAIL.to_label(series.version) not_label = StatusLabelSuffixes.PASS.to_label(series.version) + pr.update() # make sure we are looking at the up to date labels labels = {label.name for label in pr.labels} # Always make sure to remove the unused label so that we eventually # converge on having only one pass/fail label for each version, come From 7bceac4c0d410f5e7ac461f1f68bd7584eb38a4d Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Fri, 5 Sep 2025 16:49:19 -0700 Subject: [PATCH 6/6] Reset github connections on every sync iteration The HTTP connections to GitHub are managed by pygithub library. It uses a connection pool, initialized when Github object is created (in KPD we have one per BranchWorker), and the connections are actively reused. KPD creates the BranchWorkers, and by extension the Github objects, only once at start up. However KPD runs the main sync_patches() routine once every two minutes, which means that actual underlying connections are very likely to become stale. Of course, there is an encapsulated logic in pygithub to re-establish the connections. However it appears that attempting to use a connection that was never explicitly closed, causes issues when interacting with ttlsproxy. In particular we often get AsyncSocketException, which appears to be correlated with KPD's misbehavior with respect to email notifications. Attempt to mitigate these problems by re-creating GithubSync object (which spawns BranchWorker-s) on every iteration of the main daemon loop (that is: every 2 mins). Differential Revision: D80043971 Reviewed-by: Song Liu Signed-off-by: Ihor Solodrai --- kernel_patches_daemon/daemon.py | 45 ++++++++++++++++++++++++--------- tests/test_daemon.py | 4 ++- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/kernel_patches_daemon/daemon.py b/kernel_patches_daemon/daemon.py index 73b82d0..868c330 100755 --- a/kernel_patches_daemon/daemon.py +++ b/kernel_patches_daemon/daemon.py @@ -30,28 +30,49 @@ def __init__( loop_delay: int = DEFAULT_LOOP_DELAY, ) -> None: self.project: str = kpd_config.patchwork.project - self.github_sync_worker = GithubSync( - kpd_config=kpd_config, labels_cfg=labels_cfg - ) + self.kpd_config = kpd_config + self.labels_cfg = labels_cfg self.loop_delay: Final[int] = loop_delay self.metrics_logger = metrics_logger + self.github_sync_worker: GithubSync = GithubSync( + kpd_config=self.kpd_config, labels_cfg=self.labels_cfg + ) + + def reset_github_sync(self) -> bool: + try: + self.github_sync_worker = GithubSync( + kpd_config=self.kpd_config, labels_cfg=self.labels_cfg + ) + return True + except Exception: + logger.exception( + "Unhandled exception while creating GithubSync object", + exc_info=True, + ) + return False async def submit_metrics(self) -> None: - if self.metrics_logger: - logger.info("Submitting run metrics into metrics logger") - try: - self.metrics_logger(self.project, self.github_sync_worker.stats) - except Exception: - logger.exception( - "Failed to submit run metrics into metrics logger", exc_info=True - ) - else: + if self.metrics_logger is None: logger.warn( "Not submitting run metrics because metrics logger is not configured" ) + return + try: + self.metrics_logger(self.project, self.github_sync_worker.stats) + logger.info("Submitted run metrics into metrics logger") + except Exception: + logger.exception( + "Failed to submit run metrics into metrics logger", exc_info=True + ) async def run(self) -> None: while True: + ok = self.reset_github_sync() + if not ok: + logger.error( + "Most likely something went wrong connecting to GitHub or Patchwork. Skipping this iteration without submitting metrics." + ) + continue try: await self.github_sync_worker.sync_patches() self.github_sync_worker.increment_counter("runs_successful") diff --git a/tests/test_daemon.py b/tests/test_daemon.py index 9994ad9..2feae4e 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -6,7 +6,7 @@ import unittest from typing import Any, Dict, List -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch from kernel_patches_daemon.config import KPDConfig from kernel_patches_daemon.daemon import KernelPatchesWorker @@ -68,6 +68,7 @@ def setUp(self) -> None: ) self.worker.github_sync_worker.sync_patches = AsyncMock() + self.worker.reset_github_sync = MagicMock(return_value=True) async def test_run_ok(self) -> None: with ( @@ -81,6 +82,7 @@ async def test_run_ok(self) -> None: gh_sync = self.worker.github_sync_worker gh_sync.sync_patches.assert_called_once() + self.worker.reset_github_sync.assert_called_once() self.assertEqual(len(LOGGED_METRICS), 1) stats = LOGGED_METRICS[0][self.worker.project] self.assertEqual(stats["runs_successful"], 1)