Skip to content

Commit 52858eb

Browse files
authored
feat: more accurate head detection (#843)
We currently uses HEAD to the tip of the PR branch. When it's a PR, the HEAD is a merge_commit, this merge commits may contains file changes from main, but not from the PR. This change adjusts the code to use head.sha instead. This change the changed_files algo more accurate.
1 parent 6c46c2b commit 52858eb

File tree

7 files changed

+135
-68
lines changed

7 files changed

+135
-68
lines changed

mergify_cli/ci/scopes/changed_files.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def ensure_git_history(base: str, head: str) -> None:
4848
last_commits_count = get_commits_count()
4949
while not has_merge_base(base, head):
5050
fetch_depth = min(fetch_depth * 2, sys.maxsize)
51-
_run(["git", "fetch", f"--deepen={fetch_depth}", "origin", base, "HEAD"])
51+
_run(["git", "fetch", f"--deepen={fetch_depth}", "origin", base, head])
5252
commits_count = get_commits_count()
5353
if commits_count == last_commits_count:
5454
if not has_merge_base(base, head):
@@ -59,8 +59,7 @@ def ensure_git_history(base: str, head: str) -> None:
5959
last_commits_count = commits_count
6060

6161

62-
def git_changed_files(base: str) -> list[str]:
63-
head = "HEAD"
62+
def git_changed_files(base: str, head: str) -> list[str]:
6463
ensure_git_history(base, head)
6564
# Committed changes only between base_sha and HEAD.
6665
# Includes: Added (A), Copied (C), Modified (M), Renamed (R), Type-changed (T), Deleted (D)

mergify_cli/ci/scopes/cli.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,18 @@
1010
import pydantic
1111

1212
from mergify_cli import utils
13-
from mergify_cli.ci.scopes import base_detector
1413
from mergify_cli.ci.scopes import changed_files
1514
from mergify_cli.ci.scopes import config
1615
from mergify_cli.ci.scopes import exceptions
16+
from mergify_cli.ci.scopes import git_refs_detector
1717

1818

1919
if typing.TYPE_CHECKING:
2020
from collections import abc
2121

2222
GITHUB_ACTIONS_SCOPES_OUTPUT_NAME = "scopes"
2323
GITHUB_ACTIONS_BASE_OUTPUT_NAME = "base"
24+
GITHUB_ACTIONS_HEAD_OUTPUT_NAME = "head"
2425

2526

2627
def match_scopes(
@@ -56,6 +57,7 @@ def match_scopes(
5657

5758
def maybe_write_github_outputs(
5859
base: str,
60+
head: str,
5961
all_scopes: abc.Iterable[str],
6062
scopes_hit: set[str],
6163
) -> None:
@@ -73,21 +75,23 @@ def maybe_write_github_outputs(
7375
key: "true" if key in scopes_hit else "false" for key in sorted(all_scopes)
7476
}
7577
fh.write(f"{GITHUB_ACTIONS_BASE_OUTPUT_NAME}={base}\n")
78+
fh.write(f"{GITHUB_ACTIONS_HEAD_OUTPUT_NAME}={head}\n")
7679
fh.write(
7780
f"{GITHUB_ACTIONS_SCOPES_OUTPUT_NAME}<<{delimiter}\n{json.dumps(data)}\n{delimiter}\n",
7881
)
7982

8083

8184
def maybe_write_github_step_summary(
8285
base: str,
86+
head: str,
8387
all_scopes: abc.Iterable[str],
8488
scopes_hit: set[str],
8589
) -> None:
8690
gha = os.environ.get("GITHUB_STEP_SUMMARY")
8791
if not gha:
8892
return
8993
# Build a pretty Markdown table with emojis
90-
markdown = f"## Mergify CI Scope Matching Results for `{base}...HEAD`\n\n"
94+
markdown = f"## Mergify CI Scope Matching Results for `{base}...{head}`\n\n"
9195
markdown += "| 🎯 Scope | ✅ Match |\n|:--|:--|\n"
9296
for scope in sorted(all_scopes):
9397
emoji = "✅" if scope in scopes_hit else "❌"
@@ -103,6 +107,7 @@ class InvalidDetectedScopeError(exceptions.ScopesError):
103107

104108
class DetectedScope(pydantic.BaseModel):
105109
base_ref: str
110+
head_ref: str
106111
scopes: set[str]
107112

108113
def save_to_file(self, file: str) -> None:
@@ -120,9 +125,10 @@ def load_from_file(cls, filename: str) -> DetectedScope:
120125

121126
def detect(config_path: str) -> DetectedScope:
122127
cfg = config.Config.from_yaml(config_path)
123-
base = base_detector.detect()
128+
git_refs = git_refs_detector.detect()
124129

125-
click.echo(f"Base: {base.ref}")
130+
click.echo(f"Base: {git_refs.base}")
131+
click.echo(f"Head: {git_refs.head}")
126132

127133
scopes_hit: set[str]
128134
per_scope: dict[str, list[str]]
@@ -133,7 +139,7 @@ def detect(config_path: str) -> DetectedScope:
133139
scopes_hit = set()
134140
per_scope = {}
135141
elif isinstance(source, config.SourceFiles):
136-
changed = changed_files.git_changed_files(base.ref)
142+
changed = changed_files.git_changed_files(git_refs.base, git_refs.head)
137143
click.echo("Changed files detected:")
138144
for file in changed:
139145
click.echo(f"- {file}")
@@ -148,7 +154,7 @@ def detect(config_path: str) -> DetectedScope:
148154

149155
if cfg.scopes.merge_queue_scope is not None:
150156
all_scopes.add(cfg.scopes.merge_queue_scope)
151-
if base.is_merge_queue:
157+
if git_refs.is_merge_queue:
152158
scopes_hit.add(cfg.scopes.merge_queue_scope)
153159

154160
if scopes_hit:
@@ -161,9 +167,18 @@ def detect(config_path: str) -> DetectedScope:
161167
else:
162168
click.echo("No scopes matched.")
163169

164-
maybe_write_github_outputs(base.ref, all_scopes, scopes_hit)
165-
maybe_write_github_step_summary(base.ref, all_scopes, scopes_hit)
166-
return DetectedScope(base_ref=base.ref, scopes=scopes_hit)
170+
maybe_write_github_outputs(git_refs.base, git_refs.head, all_scopes, scopes_hit)
171+
maybe_write_github_step_summary(
172+
git_refs.base,
173+
git_refs.head,
174+
all_scopes,
175+
scopes_hit,
176+
)
177+
return DetectedScope(
178+
base_ref=git_refs.base,
179+
head_ref=git_refs.head,
180+
scopes=scopes_hit,
181+
)
167182

168183

169184
async def send_scopes(

mergify_cli/ci/scopes/base_detector.py renamed to mergify_cli/ci/scopes/git_refs_detector.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ def _detect_base_from_merge_queue_payload(ev: dict[str, typing.Any]) -> str | No
5959
return None
6060

6161

62+
def _detect_head_from_event(ev: dict[str, typing.Any]) -> str | None:
63+
pr = ev.get("pull_request")
64+
if isinstance(pr, dict):
65+
sha = pr.get("head", {}).get("sha")
66+
if isinstance(sha, str) and sha:
67+
return sha
68+
69+
return None
70+
71+
6272
def _detect_base_from_event(ev: dict[str, typing.Any]) -> str | None:
6373
pr = ev.get("pull_request")
6474
if isinstance(pr, dict):
@@ -77,6 +87,13 @@ def _detect_default_branch_from_event(ev: dict[str, typing.Any]) -> str | None:
7787
return None
7888

7989

90+
def _detect_head_from_push_event(ev: dict[str, typing.Any]) -> str | None:
91+
sha = ev.get("after")
92+
if isinstance(sha, str) and sha:
93+
return sha
94+
return None
95+
96+
8097
def _detect_base_from_push_event(ev: dict[str, typing.Any]) -> str | None:
8198
sha = ev.get("before")
8299
if isinstance(sha, str) and sha:
@@ -85,8 +102,9 @@ def _detect_base_from_push_event(ev: dict[str, typing.Any]) -> str | None:
85102

86103

87104
@dataclasses.dataclass
88-
class Base:
89-
ref: str
105+
class References:
106+
base: str
107+
head: str
90108
is_merge_queue: bool
91109

92110

@@ -98,37 +116,39 @@ class Base:
98116
}
99117

100118

101-
def detect() -> Base:
119+
def detect() -> References:
102120
try:
103121
event_name, event = utils.get_github_event()
104122
except utils.GitHubEventNotFoundError:
105123
# fallback to last commit
106-
return Base("HEAD^", is_merge_queue=False)
124+
return References("HEAD^", "HEAD", is_merge_queue=False)
107125
else:
108126
if event_name in PULL_REQUEST_EVENTS:
127+
head = _detect_head_from_event(event) or "HEAD"
109128
# 0) merge-queue PR override
110129
mq_sha = _detect_base_from_merge_queue_payload(event)
111130
if mq_sha:
112-
return Base(mq_sha, is_merge_queue=True)
131+
return References(mq_sha, head, is_merge_queue=True)
113132

114133
# 1) standard event payload
115134
event_sha = _detect_base_from_event(event)
116135
if event_sha:
117-
return Base(event_sha, is_merge_queue=False)
136+
return References(event_sha, head, is_merge_queue=False)
118137

119138
# 2) standard event payload
120139
event_sha = _detect_default_branch_from_event(event)
121140
if event_sha:
122-
return Base(event_sha, is_merge_queue=False)
141+
return References(event_sha, head, is_merge_queue=False)
123142

124143
elif event_name == "push":
125-
event_sha = _detect_base_from_push_event(event)
126-
if event_sha:
127-
return Base(event_sha, is_merge_queue=False)
144+
head_sha = _detect_head_from_push_event(event) or "HEAD"
145+
base_sha = _detect_base_from_push_event(event)
146+
if base_sha:
147+
return References(base_sha, head_sha, is_merge_queue=False)
128148

129149
event_sha = _detect_default_branch_from_event(event)
130150
if event_sha:
131-
return Base(event_sha, is_merge_queue=False)
151+
return References(event_sha, "HEAD", is_merge_queue=False)
132152
else:
133153
msg = "Unhandled GITHUB_EVENT_NAME"
134154
raise BaseNotFoundError(msg)

mergify_cli/tests/ci/scopes/test_changed_files.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,47 +20,48 @@ def test_git_changed_files(mock_subprocess: test_utils.SubprocessMocks) -> None:
2020
"file1.py\nfile2.js\n",
2121
)
2222

23-
result = changed_files.git_changed_files("main")
23+
result = changed_files.git_changed_files("main", "HEAD")
2424

2525
assert result == ["file1.py", "file2.js"]
2626

2727

2828
def test_git_changed_files_fetch_alot_of_history(
2929
mock_subprocess: test_utils.SubprocessMocks,
3030
) -> None:
31-
sha = "b3deb84c4befe1918995b18eb06fa05f9074636d"
31+
base = "b3deb84c4befe1918995b18eb06fa05f9074636d"
32+
head = "9b6d25af10e6285862eb2476106f266d2aa303cf"
3233

3334
mock_subprocess.register(
34-
["git", "merge-base", sha, "HEAD"],
35+
["git", "merge-base", base, head],
3536
"No such git object",
3637
1,
3738
)
3839
mock_subprocess.register(
39-
["git", "fetch", "--no-tags", "--depth=100", "origin", sha, "HEAD"],
40+
["git", "fetch", "--no-tags", "--depth=100", "origin", base, head],
4041
)
4142
mock_subprocess.register(["git", "rev-list", "--count", "--all"], "100")
4243

4344
# Loop until we find it
4445
for count in (200, 400, 800, 1600):
4546
mock_subprocess.register(
46-
["git", "merge-base", sha, "HEAD"],
47+
["git", "merge-base", base, head],
4748
"No such git object",
4849
1,
4950
)
5051
mock_subprocess.register(
51-
["git", "fetch", f"--deepen={count}", "origin", sha, "HEAD"],
52+
["git", "fetch", f"--deepen={count}", "origin", base, head],
5253
)
5354
mock_subprocess.register(["git", "rev-list", "--count", "--all"], f"{count}")
5455

5556
# We found it!
56-
mock_subprocess.register(["git", "merge-base", sha, "HEAD"])
57+
mock_subprocess.register(["git", "merge-base", base, head])
5758

5859
mock_subprocess.register(
59-
["git", "diff", "--name-only", "--diff-filter=ACMRTD", f"{sha}...HEAD"],
60+
["git", "diff", "--name-only", "--diff-filter=ACMRTD", f"{base}...{head}"],
6061
"file1.py\nfile2.js\n",
6162
)
6263

63-
result = changed_files.git_changed_files(sha)
64+
result = changed_files.git_changed_files(base, head)
6465

6566
assert result == ["file1.py", "file2.js"]
6667

@@ -74,7 +75,7 @@ def test_git_changed_files_empty(mock_subprocess: test_utils.SubprocessMocks) ->
7475
"",
7576
)
7677

77-
result = changed_files.git_changed_files("main")
78+
result = changed_files.git_changed_files("main", "HEAD")
7879

7980
assert result == []
8081

@@ -90,4 +91,4 @@ def test_run_command_failure(mock_subprocess: test_utils.SubprocessMocks) -> Non
9091
)
9192

9293
with pytest.raises(changed_files.ChangedFilesError, match="Command failed"):
93-
changed_files.git_changed_files("main")
94+
changed_files.git_changed_files("main", "HEAD")

0 commit comments

Comments
 (0)