Skip to content

Commit 55a2b22

Browse files
committed
Add script to validate upstream references in PR branch commits
This script scans each commit in a PR branch for upstream Linux kernel commit references and validates those references. If a commit references an upstream commit, the script checks that the referenced commit exists in mainline and reports if it has been marked with a Fixes: tag in the upstream kernel. Usage: python3 check_kernel_commits.py --repo path --pr_branch branch_name --base_branch branch_name [--markdown] By default, the script outputs results for terminal display. Use the --markdown flag to format output for GitHub PR comments.
1 parent 8bf3d21 commit 55a2b22

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed

check_kernel_commits.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import subprocess
5+
import re
6+
import sys
7+
import textwrap
8+
9+
def run_git(repo, args):
10+
"""Run a git command in the given repository and return its output as a string."""
11+
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
12+
if result.returncode != 0:
13+
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
14+
return result.stdout
15+
16+
def ref_exists(repo, ref):
17+
"""Return True if the given ref exists in the repository, False otherwise."""
18+
try:
19+
run_git(repo, ['rev-parse', '--verify', '--quiet', ref])
20+
return True
21+
except RuntimeError:
22+
return False
23+
24+
def get_pr_commits(repo, pr_branch, base_branch):
25+
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
26+
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
27+
return output.strip().splitlines()
28+
29+
def get_commit_message(repo, sha):
30+
"""Get the commit message for a given commit SHA."""
31+
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])
32+
33+
def get_short_hash_and_subject(repo, sha):
34+
"""Get the abbreviated commit hash and subject for a given commit SHA."""
35+
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
36+
short_hash, subject = output.split('\x00', 1)
37+
return short_hash, subject
38+
39+
def hash_exists_in_mainline(repo, upstream_ref, hash_):
40+
"""
41+
Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it).
42+
"""
43+
try:
44+
run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref])
45+
return True
46+
except RuntimeError:
47+
return False
48+
49+
def find_fixes_in_mainline(repo, upstream_ref, hash_):
50+
"""
51+
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive.
52+
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length.
53+
"""
54+
results = []
55+
# Get all commits with 'Fixes:' in the message
56+
output = run_git(repo, [
57+
'log', upstream_ref, '--grep', 'Fixes:', '-i', '--format=%H %h %s (%an)%x0a%B%x00'
58+
]).strip()
59+
if not output:
60+
return ""
61+
# Each commit is separated by a NUL character and a newline
62+
commits = output.split('\x00\x0a')
63+
# Prepare hash prefixes from 12 down to 6
64+
hash_prefixes = [hash_[:l] for l in range(12, 5, -1)]
65+
for commit in commits:
66+
if not commit.strip():
67+
continue
68+
# The first line is the summary, the rest is the body
69+
lines = commit.splitlines()
70+
if not lines:
71+
continue
72+
header = lines[0]
73+
full_hash = header.split()[0]
74+
# Search for Fixes: lines in the commit message
75+
for line in lines[1:]:
76+
m = re.match(r'^\s*Fixes:\s*([0-9a-fA-F]{6,40})', line, re.IGNORECASE)
77+
if m:
78+
for prefix in hash_prefixes:
79+
if m.group(1).lower().startswith(prefix.lower()):
80+
results.append(' '.join(header.split()[1:]))
81+
break
82+
else:
83+
continue
84+
return "\n".join(results)
85+
86+
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
87+
"""Wrap a paragraph of text to the specified width and indentation."""
88+
wrapper = textwrap.TextWrapper(width=width,
89+
initial_indent=initial_indent,
90+
subsequent_indent=subsequent_indent,
91+
break_long_words=False,
92+
break_on_hyphens=False)
93+
return wrapper.fill(text)
94+
95+
def main():
96+
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
97+
parser.add_argument("--repo", help="Path to the git repo", required=True)
98+
parser.add_argument("--pr_branch", help="Name of the PR branch", required=True)
99+
parser.add_argument("--base_branch", help="Name of the base branch", required=True)
100+
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
101+
parser.add_argument("--upstream-ref", default="origin/kernel-mainline", help="Reference to upstream mainline branch (default: origin/kernel-mainline)")
102+
args = parser.parse_args()
103+
104+
upstream_ref = args.upstream_ref
105+
106+
# Validate that all required refs exist before continuing
107+
missing_refs = []
108+
for refname, refval in [('upstream reference', upstream_ref),
109+
('PR branch', args.pr_branch),
110+
('base branch', args.base_branch)]:
111+
if not ref_exists(args.repo, refval):
112+
missing_refs.append((refname, refval))
113+
if missing_refs:
114+
for refname, refval in missing_refs:
115+
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
116+
print("Please fetch or create the required references before running this script.")
117+
sys.exit(1)
118+
119+
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
120+
if not pr_commits:
121+
if args.markdown:
122+
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
123+
else:
124+
print("No commits found in PR branch that are not in base branch.")
125+
sys.exit(0)
126+
127+
any_findings = False
128+
out_lines = []
129+
130+
for sha in reversed(pr_commits): # oldest first
131+
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
132+
pr_commit_desc = f"{short_hash} ({subject})"
133+
msg = get_commit_message(args.repo, sha)
134+
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
135+
for uhash in upstream_hashes:
136+
short_uhash = uhash[:12]
137+
# Ensure the referenced commit in the PR actually exists in the upstream ref.
138+
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
139+
if not exists:
140+
any_findings = True
141+
if args.markdown:
142+
out_lines.append(
143+
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
144+
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
145+
)
146+
else:
147+
prefix = "[NOTFOUND] "
148+
header = (f"{prefix}PR commit {pr_commit_desc} references upstream commit "
149+
f"{short_uhash}, which does not exist in kernel-mainline.")
150+
out_lines.append(
151+
wrap_paragraph(header, width=80, initial_indent='',
152+
subsequent_indent=' ' * len(prefix)) # spaces for '[NOTFOUND] '
153+
)
154+
out_lines.append("") # blank line
155+
continue
156+
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
157+
if fixes:
158+
any_findings = True
159+
if args.markdown:
160+
fixes_block = " " + fixes.replace("\n", "\n ")
161+
out_lines.append(
162+
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
163+
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
164+
f" Linux kernel:\n\n"
165+
f"```text\n{fixes_block}\n```\n"
166+
)
167+
else:
168+
prefix = "[FIXES] "
169+
header = (f"{prefix}PR commit {pr_commit_desc} references upstream commit "
170+
f"{short_uhash}, which has Fixes tags:")
171+
out_lines.append(
172+
wrap_paragraph(header, width=80, initial_indent='',
173+
subsequent_indent=' ' * len(prefix)) # spaces for '[FIXES] '
174+
)
175+
out_lines.append("") # blank line after 'Fixes tags:'
176+
for line in fixes.splitlines():
177+
out_lines.append(' ' + line)
178+
out_lines.append("") # blank line
179+
180+
if any_findings:
181+
if args.markdown:
182+
print("## :mag: Upstream Linux Kernel Commit Check\n")
183+
print('\n'.join(out_lines))
184+
print("*This is an automated message from the kernel commit checker workflow.*")
185+
else:
186+
print('\n'.join(out_lines))
187+
else:
188+
if args.markdown:
189+
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
190+
else:
191+
print("All referenced commits exist upstream and have no Fixes: tags.")
192+
193+
if __name__ == "__main__":
194+
main()

0 commit comments

Comments
 (0)