From 32e314c7b4f3cec222721a4fa266f9bd2821a9f2 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 18 Jul 2025 09:56:30 -0700 Subject: [PATCH] docs: clarify linux_mirror usage Generated-by: ChatGPT Codex Signed-off-by: Codex --- README.md | 16 +++++++ configs/kpd.json | 4 +- kernel_patches_daemon/branch_worker.py | 23 +++++++++- kernel_patches_daemon/config.py | 4 ++ kernel_patches_daemon/github_connector.py | 6 +-- kernel_patches_daemon/github_sync.py | 2 + tests/test_branch_worker.py | 53 +++++++++++++++++++++++ tests/test_config.py | 11 +++++ tests/test_github_connector.py | 20 ++++++--- tests/test_github_sync.py | 14 ++++++ 10 files changed, 141 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 028bfff..36e82f7 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,22 @@ poetry install poetry run python -m unittest ``` +### Mirror setup + +To make more efficient use of network bandwidth consider having a mirror of your target git tree +under /mirror/ or something like that and set the configuration attribute "mirror_dir" variable to the +path where to find possible git trees. + +If your git tree is a linux clone set the "linux_mirror" flag to true. When the +exact repository name is missing from the mirror directory, for example when +{{ mirror_dir }}/linux-subsystem.git does not exist, kpd falls back to using +{{ mirror_dir }}/linux.git as reference. + +A reference target mirror path is only used if it exists. The mirror takes effect by leveraging +the git clone --reference option when cloning. Using this can save considerable bandwidth and +space, allowing kpd to run on thing guests on a corporate environment with for example an NFS +mount for local git trees on a network. + ## Running ``` poetry run python -m kernel_patches_daemon --config --label-color configs/labels.json diff --git a/configs/kpd.json b/configs/kpd.json index 13fcb92..6de83c9 100644 --- a/configs/kpd.json +++ b/configs/kpd.json @@ -41,5 +41,7 @@ "github_oauth_token": "" } }, - "base_directory": "/tmp/repos" + "base_directory": "/tmp/repos", + "mirror_dir": "/mirror/", + "linux_mirror": true } diff --git a/kernel_patches_daemon/branch_worker.py b/kernel_patches_daemon/branch_worker.py index 80d801e..cbd387a 100644 --- a/kernel_patches_daemon/branch_worker.py +++ b/kernel_patches_daemon/branch_worker.py @@ -547,6 +547,8 @@ def __init__( app_auth: Optional[Auth.AppInstallationAuth] = None, email: Optional[EmailConfig] = None, http_retries: Optional[int] = None, + mirror_dir: Optional[str] = None, + linux_mirror: bool = False, ) -> None: super().__init__( repo_url=repo_url, @@ -559,6 +561,8 @@ def __init__( self.email = email self.log_extractor = log_extractor + self.mirror_dir = mirror_dir + self.linux_mirror = linux_mirror self.ci_repo_url = ci_repo_url self.ci_repo_dir = _uniq_tmp_folder(ci_repo_url, ci_branch, base_directory) self.ci_branch = ci_branch @@ -682,9 +686,26 @@ def do_sync(self) -> None: def full_sync(self, path: str, url: str, branch: str) -> git.Repo: logging.info(f"Doing full clone from {redact_url(url)}, branch: {branch}") + multi_opts: Optional[List[str]] = None + if self.mirror_dir: + upstream_name = os.path.basename(self.upstream_url) + reference_path = os.path.join(self.mirror_dir, upstream_name) + fallback = os.path.join(self.mirror_dir, "linux.git") + if ( + not os.path.exists(reference_path) + and self.linux_mirror + and os.path.exists(fallback) + ): + reference_path = fallback + if os.path.exists(reference_path): + multi_opts = ["--reference", reference_path] + with HistogramMetricTimer(git_clone_duration, {"branch": branch}): shutil.rmtree(path, ignore_errors=True) - repo = git.Repo.clone_from(url, path) + if multi_opts: + repo = git.Repo.clone_from(url, path, multi_options=multi_opts) + else: + repo = git.Repo.clone_from(url, path) _reset_repo(repo, f"origin/{branch}") git_clone_counter.add(1, {"branch": branch}) diff --git a/kernel_patches_daemon/config.py b/kernel_patches_daemon/config.py index b1f6bd6..3f75d43 100644 --- a/kernel_patches_daemon/config.py +++ b/kernel_patches_daemon/config.py @@ -171,6 +171,8 @@ class KPDConfig: branches: Dict[str, BranchConfig] tag_to_branch_mapping: Dict[str, List[str]] base_directory: str + mirror_dir: Optional[str] = None + linux_mirror: bool = False @classmethod def from_json(cls, json: Dict) -> "KPDConfig": @@ -203,6 +205,8 @@ def from_json(cls, json: Dict) -> "KPDConfig": for name, json_config in json["branches"].items() }, base_directory=json["base_directory"], + mirror_dir=json.get("mirror_dir"), + linux_mirror=json.get("linux_mirror", False), ) @classmethod diff --git a/kernel_patches_daemon/github_connector.py b/kernel_patches_daemon/github_connector.py index daff129..02899aa 100644 --- a/kernel_patches_daemon/github_connector.py +++ b/kernel_patches_daemon/github_connector.py @@ -89,11 +89,7 @@ def __init__( self.github_account_name = gh_user.login else: self.auth_type = AuthType.APP_AUTH - app = GithubIntegration( - auth=Auth.AppAuth( - app_id=app_auth.app_id, private_key=app_auth.private_key - ) - ).get_app() + app = GithubIntegration(auth=app_auth._app_auth).get_app() self.github_account_name = app.name # Github appends '[bot]' suffix to the NamedUser # >>> pull.user diff --git a/kernel_patches_daemon/github_sync.py b/kernel_patches_daemon/github_sync.py index 3dd6e09..c657dd2 100644 --- a/kernel_patches_daemon/github_sync.py +++ b/kernel_patches_daemon/github_sync.py @@ -114,6 +114,8 @@ def __init__( ci_branch=branch_config.ci_branch, log_extractor=_log_extractor_from_project(kpd_config.patchwork.project), base_directory=kpd_config.base_directory, + mirror_dir=kpd_config.mirror_dir, + linux_mirror=kpd_config.linux_mirror, http_retries=http_retries, github_oauth_token=branch_config.github_oauth_token, app_auth=github_app_auth_from_branch_config(branch_config), diff --git a/tests/test_branch_worker.py b/tests/test_branch_worker.py index f4c7396..3423263 100644 --- a/tests/test_branch_worker.py +++ b/tests/test_branch_worker.py @@ -68,6 +68,7 @@ TEST_CI_REPO_URL = f"https://user:pass@127.0.0.1/ci-org/{TEST_CI_REPO}" TEST_CI_BRANCH = "test_ci_branch" TEST_BASE_DIRECTORY = "/repos" +TEST_MIRROR_DIRECTORY = "/mirror" TEST_BRANCH = "test-branch" TEST_CONFIG: Dict[str, Any] = { "version": 2, @@ -124,6 +125,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: "ci_branch": TEST_CI_BRANCH, "log_extractor": DefaultGithubLogExtractor(), "base_directory": TEST_BASE_DIRECTORY, + "mirror_dir": None, + "linux_mirror": False, } presets.update(kwargs) @@ -464,6 +467,56 @@ def test_fetch_repo_path_exists_git_exception(self) -> None: self._bw.fetch_repo(*fetch_params) fr.assert_called_once_with(*fetch_params) + def test_full_sync_with_mirror_dir(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY) + reference = os.path.join( + TEST_MIRROR_DIRECTORY, os.path.basename(TEST_UPSTREAM_REPO_URL) + ) + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == reference + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference", reference], + ) + + def test_full_sync_with_linux_mirror_fallback(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY, linux_mirror=True) + fallback = os.path.join(TEST_MIRROR_DIRECTORY, "linux.git") + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == fallback + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + multi_options=["--reference", fallback], + ) + + def test_full_sync_without_linux_mirror_fallback(self) -> None: + bw = BranchWorkerMock(mirror_dir=TEST_MIRROR_DIRECTORY, linux_mirror=False) + fallback = os.path.join(TEST_MIRROR_DIRECTORY, "linux.git") + with ( + patch("kernel_patches_daemon.branch_worker.os.path.exists") as exists, + patch("kernel_patches_daemon.branch_worker.shutil.rmtree") as rm, + ): + exists.side_effect = lambda p: p == fallback + bw.upstream_url = TEST_UPSTREAM_REPO_URL + bw.full_sync("somepath", "giturl", "branch") + # Without linux_mirror we should not use fallback + self._git_repo_mock.clone_from.assert_called_once_with( + "giturl", + "somepath", + ) + def test_expire_branches(self) -> None: """Only the branch that matches pattern and is expired should be deleted""" not_expired_time = datetime.fromtimestamp(3 * BRANCH_TTL) diff --git a/tests/test_config.py b/tests/test_config.py index 8d79fa3..4ed7725 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -208,5 +208,16 @@ def test_valid(self) -> None: ), }, base_directory="/repos", + mirror_dir=None, + linux_mirror=False, ) self.assertEqual(config, expected_config) + + def test_linux_mirror_enabled(self) -> None: + kpd_config_json = read_fixture("fixtures/kpd_config.json") + kpd_config_json["linux_mirror"] = True + + with patch("builtins.open", mock_open(read_data="TEST_KEY_FILE_CONTENT")): + config = KPDConfig.from_json(kpd_config_json) + + self.assertTrue(config.linux_mirror) diff --git a/tests/test_github_connector.py b/tests/test_github_connector.py index 8927c5f..774c2d4 100644 --- a/tests/test_github_connector.py +++ b/tests/test_github_connector.py @@ -247,7 +247,9 @@ def test_renew_expired_token(self) -> None: """ Verifies that `AppInstallationAuth.token` does renew an expired token. """ - now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE) + now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE).replace( + tzinfo=datetime.timezone.utc + ) expired_at_date = now + datetime.timedelta(hours=2) expired_at_next = expired_at_date + datetime.timedelta(hours=2) side_effect = [ @@ -282,7 +284,9 @@ def test_donot_renew_non_expired_token(self) -> None: Verifies that `Requester._refresh_token_if_needed` does not renew a token which is not expired yet. """ - now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE) + now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE).replace( + tzinfo=datetime.timezone.utc + ) expired_at_date = now + datetime.timedelta(hours=2) expired_at_next = expired_at_date + datetime.timedelta(hours=2) side_effect = [ @@ -318,7 +322,9 @@ def test_repo_url(self) -> None: reflect the current token, while when using oauth authentication, the repo_url stays the same as provided by the caller upon initialization. """ - now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE) + now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE).replace( + tzinfo=datetime.timezone.utc + ) expired_at_date = now + datetime.timedelta(hours=2) expired_at_next = expired_at_date + datetime.timedelta(hours=2) side_effect = [ @@ -369,7 +375,9 @@ def test_set_user_token_in_url_when_not_present(self) -> None: Verifies that when user:token is not initially present in the `repo_url`, the user:token from the gh app is inserted into the url's netloc. """ - now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE) + now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE).replace( + tzinfo=datetime.timezone.utc + ) expired_at_date = now + datetime.timedelta(hours=2) side_effect = [ munch.munchify({"token": "token1", "expires_at": expired_at_date}), @@ -423,7 +431,9 @@ class TestCase: ] for case in test_cases: with self.subTest(msg=case.name): - now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE) + now = datetime.datetime.fromisoformat(DEFAULT_FREEZE_DATE).replace( + tzinfo=datetime.timezone.utc + ) expired_at_date = now + datetime.timedelta(hours=2) side_effect = [ munch.munchify({"token": "token1", "expires_at": expired_at_date}), diff --git a/tests/test_github_sync.py b/tests/test_github_sync.py index 789dbf4..b73bd02 100644 --- a/tests/test_github_sync.py +++ b/tests/test_github_sync.py @@ -126,6 +126,20 @@ class TestCase: gh.workers[TEST_BRANCH].ci_repo_dir.startswith(case.prefix), ) + def test_init_with_mirror_dir(self) -> None: + config = copy.copy(TEST_CONFIG) + config["mirror_dir"] = "/mirror" + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertEqual("/mirror", gh.workers[TEST_BRANCH].mirror_dir) + + def test_init_with_linux_mirror(self) -> None: + config = copy.copy(TEST_CONFIG) + config["linux_mirror"] = True + kpd_config = KPDConfig.from_json(config) + gh = GithubSyncMock(kpd_config=kpd_config) + self.assertTrue(gh.workers[TEST_BRANCH].linux_mirror) + def test_close_existing_prs_for_series(self) -> None: matching_pr_mock = MagicMock() matching_pr_mock.title = "matching"