From 9f83927dcd5ed9794686566e79828c9922e3f05f Mon Sep 17 00:00:00 2001 From: Mehdi ABAAKOUK Date: Wed, 8 Oct 2025 11:34:44 +0200 Subject: [PATCH] feat(scopes): fetch commits until we found the base sha When doing the git diff we need have all intermediates objects. This change fetches them. Change-Id: I1c012afce0f5ac23cf9a599d4f2c4787b4feba5d --- mergify_cli/ci/cli.py | 2 +- mergify_cli/ci/scopes/changed_files.py | 67 +++++++++++++-- mergify_cli/ci/scopes/cli.py | 15 +++- .../tests/ci/scopes/test_changed_files.py | 86 +++++++++++++++---- mergify_cli/tests/conftest.py | 6 ++ mergify_cli/tests/utils.py | 47 ++++++++++ 6 files changed, 193 insertions(+), 30 deletions(-) diff --git a/mergify_cli/ci/cli.py b/mergify_cli/ci/cli.py index 4cd6cf53..95847c8a 100644 --- a/mergify_cli/ci/cli.py +++ b/mergify_cli/ci/cli.py @@ -207,5 +207,5 @@ def scopes( ) -> None: try: scopes_cli.detect(config_path=config_path) - except scopes_cli.ConfigInvalidError as e: + except scopes_cli.ScopesError as e: raise click.ClickException(str(e)) from e diff --git a/mergify_cli/ci/scopes/changed_files.py b/mergify_cli/ci/scopes/changed_files.py index b4a2e56d..a402994d 100644 --- a/mergify_cli/ci/scopes/changed_files.py +++ b/mergify_cli/ci/scopes/changed_files.py @@ -1,23 +1,74 @@ from __future__ import annotations import subprocess +import sys -import click + +COMMITS_BATCH_SIZE = 100 + + +class ChangedFilesError(Exception): + pass def _run(cmd: list[str]) -> str: + return subprocess.check_output(cmd, text=True, encoding="utf-8").strip() + + +def has_merge_base(base: str, head: str) -> bool: try: - return subprocess.check_output(cmd, text=True, encoding="utf-8").strip() - except subprocess.CalledProcessError as e: - msg = f"Command failed: {' '.join(cmd)}\n{e}" - raise click.ClickException(msg) from e + _run(["git", "merge-base", base, head]) + except subprocess.CalledProcessError: + return False + return True + + +def get_commits_count() -> int: + return int(_run(["git", "rev-list", "--count", "--all"])) + + +def ensure_git_history(base: str, head: str) -> None: + fetch_depth = COMMITS_BATCH_SIZE + + if not has_merge_base(base, head): + _run( + [ + "git", + "fetch", + "--no-tags", + f"--depth={fetch_depth}", + "origin", + base, + head, + ], + ) + + last_commits_count = get_commits_count() + while not has_merge_base(base, head): + fetch_depth = min(fetch_depth * 2, sys.maxsize) + _run(["git", "fetch", f"--deepen={fetch_depth}", "origin", base, "HEAD"]) + commits_count = get_commits_count() + if commits_count == last_commits_count: + if not has_merge_base(base, head): + msg = f"Cannot find a common ancestor between {base} and {head}" + raise ChangedFilesError(msg) + + break + last_commits_count = commits_count def git_changed_files(base: str) -> list[str]: + head = "HEAD" + ensure_git_history(base, head) # Committed changes only between base_sha and HEAD. # Includes: Added (A), Copied (C), Modified (M), Renamed (R), Type-changed (T), Deleted (D) # Excludes: Unmerged (U), Unknown (X), Broken (B) - out = _run( - ["git", "diff", "--name-only", "--diff-filter=ACMRTD", f"{base}...HEAD"], - ) + try: + out = _run( + ["git", "diff", "--name-only", "--diff-filter=ACMRTD", f"{base}...{head}"], + ) + except subprocess.CalledProcessError as e: + msg = f"Command failed: {' '.join(e.cmd)}\n{e}" + raise ChangedFilesError(msg) + return [line for line in out.splitlines() if line] diff --git a/mergify_cli/ci/scopes/cli.py b/mergify_cli/ci/scopes/cli.py index a5e1bfa5..f05d58e2 100644 --- a/mergify_cli/ci/scopes/cli.py +++ b/mergify_cli/ci/scopes/cli.py @@ -20,7 +20,15 @@ SCOPE_NAME_RE = r"^[A-Za-z0-9_-]+$" -class ConfigInvalidError(Exception): +class ScopesError(Exception): + pass + + +class ConfigInvalidError(ScopesError): + pass + + +class ChangedFilesError(ScopesError): pass @@ -104,7 +112,10 @@ def maybe_write_github_outputs( def detect(config_path: str) -> None: cfg = Config.from_yaml(config_path) base = base_detector.detect() - changed = changed_files.git_changed_files(base) + try: + changed = changed_files.git_changed_files(base) + except changed_files.ChangedFilesError as e: + raise ChangedFilesError(str(e)) scopes_hit, per_scope = match_scopes(cfg, changed) click.echo(f"Base: {base}") diff --git a/mergify_cli/tests/ci/scopes/test_changed_files.py b/mergify_cli/tests/ci/scopes/test_changed_files.py index 59317dca..e4b3c352 100644 --- a/mergify_cli/tests/ci/scopes/test_changed_files.py +++ b/mergify_cli/tests/ci/scopes/test_changed_files.py @@ -1,38 +1,86 @@ -import subprocess -from unittest import mock - -import click import pytest from mergify_cli.ci.scopes import changed_files +from mergify_cli.tests import utils as test_utils -@mock.patch("mergify_cli.ci.scopes.changed_files.subprocess.check_output") -def test_git_changed_files(mock_subprocess: mock.Mock) -> None: - mock_subprocess.return_value = "file1.py\nfile2.js\n" +def test_git_changed_files(mock_subprocess: test_utils.SubprocessMocks) -> None: + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register(["git", "rev-list", "--count", "--all"], "100") + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register( + ["git", "diff", "--name-only", "--diff-filter=ACMRTD", "main...HEAD"], + "file1.py\nfile2.js\n", + ) result = changed_files.git_changed_files("main") - mock_subprocess.assert_called_once_with( - ["git", "diff", "--name-only", "--diff-filter=ACMRTD", "main...HEAD"], - text=True, - encoding="utf-8", + assert result == ["file1.py", "file2.js"] + + +def test_git_changed_files_fetch_alot_of_history( + mock_subprocess: test_utils.SubprocessMocks, +) -> None: + sha = "b3deb84c4befe1918995b18eb06fa05f9074636d" + + mock_subprocess.register( + ["git", "merge-base", sha, "HEAD"], + "No such git object", + 1, ) + mock_subprocess.register( + ["git", "fetch", "--no-tags", "--depth=100", "origin", sha, "HEAD"], + ) + mock_subprocess.register(["git", "rev-list", "--count", "--all"], "100") + + # Loop until we find it + for count in (200, 400, 800, 1600): + mock_subprocess.register( + ["git", "merge-base", sha, "HEAD"], + "No such git object", + 1, + ) + mock_subprocess.register( + ["git", "fetch", f"--deepen={count}", "origin", sha, "HEAD"], + ) + mock_subprocess.register(["git", "rev-list", "--count", "--all"], f"{count}") + + # We found it! + mock_subprocess.register(["git", "merge-base", sha, "HEAD"]) + + mock_subprocess.register( + ["git", "diff", "--name-only", "--diff-filter=ACMRTD", f"{sha}...HEAD"], + "file1.py\nfile2.js\n", + ) + + result = changed_files.git_changed_files(sha) + assert result == ["file1.py", "file2.js"] -@mock.patch("mergify_cli.ci.scopes.changed_files.subprocess.check_output") -def test_git_changed_files_empty(mock_subprocess: mock.Mock) -> None: - mock_subprocess.return_value = "" +def test_git_changed_files_empty(mock_subprocess: test_utils.SubprocessMocks) -> None: + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register(["git", "rev-list", "--count", "--all"], "100") + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register( + ["git", "diff", "--name-only", "--diff-filter=ACMRTD", "main...HEAD"], + "", + ) result = changed_files.git_changed_files("main") assert result == [] -@mock.patch("mergify_cli.ci.scopes.changed_files.subprocess.check_output") -def test_run_command_failure(mock_subprocess: mock.Mock) -> None: - mock_subprocess.side_effect = subprocess.CalledProcessError(1, ["git", "diff"]) +def test_run_command_failure(mock_subprocess: test_utils.SubprocessMocks) -> None: + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register(["git", "rev-list", "--count", "--all"], "100") + mock_subprocess.register(["git", "merge-base", "main", "HEAD"]) + mock_subprocess.register( + ["git", "diff", "--name-only", "--diff-filter=ACMRTD", "main...HEAD"], + "No such git object", + 1, + ) - with pytest.raises(click.ClickException, match="Command failed"): - changed_files._run(["git", "diff"]) + with pytest.raises(changed_files.ChangedFilesError, match="Command failed"): + changed_files.git_changed_files("main") diff --git a/mergify_cli/tests/conftest.py b/mergify_cli/tests/conftest.py index 5a0fdb7f..a0cfe161 100644 --- a/mergify_cli/tests/conftest.py +++ b/mergify_cli/tests/conftest.py @@ -12,6 +12,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from collections import abc from collections.abc import Generator import pathlib import subprocess @@ -77,3 +78,8 @@ def git_mock( with mock.patch("mergify_cli.utils.git", git_mock_object): yield git_mock_object + + +@pytest.fixture +def mock_subprocess() -> abc.Generator[test_utils.SubprocessMocks]: + yield from test_utils.subprocess_mocked() diff --git a/mergify_cli/tests/utils.py b/mergify_cli/tests/utils.py index 023a43f5..62cdca4e 100644 --- a/mergify_cli/tests/utils.py +++ b/mergify_cli/tests/utils.py @@ -12,8 +12,11 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from collections import abc import dataclasses +import subprocess import typing +from unittest import mock class Commit(typing.TypedDict): @@ -86,3 +89,47 @@ def commit(self, commit: Commit) -> None: f"mergify-cli-tmp:current-branch/{commit['change_id']}", output="", ) + + +@dataclasses.dataclass +class SubprocessMock: + cmd: list[str] + output: str = "" + exit_code: int = 0 + + +@dataclasses.dataclass +class SubprocessMocks: + calls: list[SubprocessMock] = dataclasses.field(default_factory=list) + + def register(self, cmd: list[str], output: str = "", exit_code: int = 0) -> None: + self.calls.append(SubprocessMock(cmd=cmd, output=output, exit_code=exit_code)) + + +def subprocess_mocked() -> abc.Generator[SubprocessMocks]: + mocks = SubprocessMocks() + + with mock.patch("subprocess.check_output") as mock_run: + + def check_output( + cmd: list[str], + **kwargs: typing.Any, # noqa: ARG001 ANN401 + ) -> str: + try: + m = mocks.calls.pop(0) + except IndexError: + msg = f"Unexpected command: {cmd}" + raise ValueError(msg) + if m.cmd == cmd: + if m.exit_code != 0: + raise subprocess.CalledProcessError( + m.exit_code, + cmd, + output=m.output, + ) + return m.output + msg = f"Unexpected command: {m.cmd} != {cmd}" + raise ValueError(msg) + + mock_run.side_effect = check_output + yield mocks