Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 178 additions & 5 deletions check_kernel_commits.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import sys
import textwrap
import os

def run_git(repo, args):
"""Run a git command in the given repository and return its output as a string."""
Expand Down Expand Up @@ -50,14 +51,15 @@ def find_fixes_in_mainline(repo, pr_branch, upstream_ref, hash_):
"""
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive.
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length.
Returns a list of tuples: (full_hash, display_string)
"""
results = []
# Get all commits with 'Fixes:' in the message
output = run_git(repo, [
'log', upstream_ref, '--grep', 'Fixes:', '-i', '--format=%H %h %s (%an)%x0a%B%x00'
]).strip()
if not output:
return ""
return []
# Each commit is separated by a NUL character and a newline
commits = output.split('\x00\x0a')
# Prepare hash prefixes from 12 down to 6
Expand All @@ -78,11 +80,11 @@ def find_fixes_in_mainline(repo, pr_branch, upstream_ref, hash_):
for prefix in hash_prefixes:
if m.group(1).lower().startswith(prefix.lower()):
if not commit_exists_in_branch(repo, pr_branch, full_hash):
results.append(' '.join(header.split()[1:]))
results.append((full_hash, ' '.join(header.split()[1:])))
break
else:
continue
return "\n".join(results)
return results

def commit_exists_in_branch(repo, pr_branch, upstream_hash_):
"""
Expand All @@ -104,17 +106,75 @@ def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
break_on_hyphens=False)
return wrapper.fill(text)

def extract_cve_from_message(msg):
"""Extract CVE reference from commit message. Returns CVE ID or None.
Only matches 'cve CVE-2025-12345', ignores 'cve-bf' and 'cve-pre' variants."""
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The docstring example uses CVE-2025-12345 which appears to be a future year. Consider using a more realistic example like CVE-2024-12345 or CVE-2023-12345.

Suggested change
Only matches 'cve CVE-2025-12345', ignores 'cve-bf' and 'cve-pre' variants."""
Only matches 'cve CVE-2024-12345', ignores 'cve-bf' and 'cve-pre' variants."""

Copilot uses AI. Check for mistakes.

Copy link
Collaborator Author

@bmastbergen bmastbergen Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The future is now copilot! Its 2025!

match = re.search(r'(?<!\S)cve\s+(CVE-\d{4}-\d+)', msg, re.IGNORECASE)
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern uses a negative lookbehind (?<!\S) but the docstring mentions it ignores 'cve-bf' and 'cve-pre' variants. The current pattern would still match 'cve CVE-2025-12345' in 'somecve CVE-2025-12345'. Consider using word boundaries \\bcve\\s+ for clearer intent.

Suggested change
match = re.search(r'(?<!\S)cve\s+(CVE-\d{4}-\d+)', msg, re.IGNORECASE)
match = re.search(r'\bcve\s+(CVE-\d{4}-\d+)', msg, re.IGNORECASE)

Copilot uses AI. Check for mistakes.

if match:
return match.group(1).upper()
return None

def run_cve_search(vulns_repo, kernel_repo, query):
"""
Run the cve_search script from the vulns repo.
Returns (success, output_message).
"""
cve_search_path = os.path.join(vulns_repo, 'scripts', 'cve_search')
if not os.path.exists(cve_search_path):
raise RuntimeError(f"cve_search script not found at {cve_search_path}")

env = os.environ.copy()
env['CVEKERNELTREE'] = kernel_repo

result = subprocess.run([cve_search_path, query],
text=True,
capture_output=True,
check=False,
env=env)

# cve_search outputs results to stdout
return result.returncode == 0, result.stdout.strip()

def main():
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
parser.add_argument("--repo", help="Path to the git repo", required=True)
parser.add_argument("--pr_branch", help="Name of the PR branch", required=True)
parser.add_argument("--base_branch", help="Name of the base branch", required=True)
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
parser.add_argument("--upstream-ref", default="origin/kernel-mainline", help="Reference to upstream mainline branch (default: origin/kernel-mainline)")
parser.add_argument("--check-cves", action='store_true', help="Check that CVE references in commit messages match upstream commit hashes")
parser.add_argument("--vulns-dir", default="../vulns", help="Path to the kernel vulnerabilities repo (default: ../vulns)")
args = parser.parse_args()

upstream_ref = args.upstream_ref

# Set up vulns repo path if CVE checking is enabled
vulns_repo = None
if args.check_cves:
vulns_repo = args.vulns_dir
vulns_repo_url = "https://git.kernel.org/pub/scm/linux/security/vulns.git"

if os.path.exists(vulns_repo):
# Repository exists, update it with git pull
try:
run_git(vulns_repo, ['pull'])
except RuntimeError as e:
print(f"WARNING: Failed to update vulns repo: {e}")
print("Continuing with existing repository...")
else:
# Repository doesn't exist, clone it
try:
result = subprocess.run(['git', 'clone', vulns_repo_url, vulns_repo],
text=True,
capture_output=True,
check=False)
if result.returncode != 0:
print(f"ERROR: Failed to clone vulns repo: {result.stderr}")
sys.exit(1)
except Exception as e:
print(f"ERROR: Failed to clone vulns repo: {e}")
sys.exit(1)

# Validate that all required refs exist before continuing
missing_refs = []
for refname, refval in [('upstream reference', upstream_ref),
Expand Down Expand Up @@ -168,8 +228,34 @@ def main():
fixes = find_fixes_in_mainline(args.repo, args.pr_branch, upstream_ref, uhash)
if fixes:
any_findings = True

# Check CVEs for bugfix commits if enabled
fix_cves = {}
if args.check_cves:
for fix_hash, fix_display in fixes:
try:
success, cve_output = run_cve_search(vulns_repo, args.repo, fix_hash)
if success:
# Parse the CVE from the result
match = re.search(r'(CVE-\d{4}-\d+)\s+is assigned to git id', cve_output)
if match:
bugfix_cve = match.group(1)
fix_cves[fix_hash] = bugfix_cve
except (RuntimeError, subprocess.SubprocessError):
# Log a warning instead of silently ignoring errors when checking bugfix CVEs
print(f"Warning: Failed to check CVE for bugfix commit {fix_hash}: {e}", file=sys.stderr)

# Build the fixes display text with CVE info
fixes_lines = []
for fix_hash, display_str in fixes:
if fix_hash in fix_cves:
fixes_lines.append(f"{display_str} ({fix_cves[fix_hash]})")
else:
fixes_lines.append(display_str)
fixes_text = "\n".join(fixes_lines)

if args.markdown:
fixes_block = " " + fixes.replace("\n", "\n ")
fixes_block = " " + fixes_text.replace("\n", "\n ")
out_lines.append(
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
Expand All @@ -185,10 +271,97 @@ def main():
subsequent_indent=' ' * len(prefix)) # spaces for '[FIXES] '
)
out_lines.append("") # blank line after 'Fixes tags:'
for line in fixes.splitlines():
for line in fixes_text.splitlines():
out_lines.append(' ' + line)
out_lines.append("") # blank line

# Check CVE if enabled
if args.check_cves:
cve_id = extract_cve_from_message(msg)

# Check if the upstream commit has a CVE associated with it
try:
success, cve_output = run_cve_search(vulns_repo, args.repo, uhash)
if success:
# Parse the output to get the CVE from the result
# Expected format: "CVE-2024-35962 is assigned to git id 65acf6e0501ac8880a4f73980d01b5d27648b956"
match = re.search(r'(CVE-\d{4}-\d+)\s+is assigned to git id', cve_output)
if match:
found_cve = match.group(1)

if cve_id:
# PR commit has a CVE reference - check if it matches
if found_cve != cve_id:
any_findings = True
if args.markdown:
out_lines.append(
f"- ❌ PR commit `{pr_commit_desc}` references `{cve_id}` but \n"
f" upstream commit `{short_uhash}` is associated with `{found_cve}`\n"
)
else:
prefix = "[CVE-MISMATCH] "
header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but "
f"upstream commit {short_uhash} is associated with {found_cve}")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ' * len(prefix))
)
out_lines.append("") # blank line
else:
# PR commit doesn't reference a CVE, but upstream has one
any_findings = True
if args.markdown:
out_lines.append(
f"- ⚠️ PR commit `{pr_commit_desc}` does not reference a CVE but \n"
f" upstream commit `{short_uhash}` is associated with `{found_cve}`\n"
)
else:
prefix = "[CVE-MISSING] "
header = (f"{prefix}PR commit {pr_commit_desc} does not reference a CVE but "
f"upstream commit {short_uhash} is associated with {found_cve}")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ' * len(prefix))
)
out_lines.append("") # blank line
else:
# The upstream commit has no CVE assigned
if cve_id:
# PR commit claims a CVE but upstream has none
any_findings = True
if args.markdown:
out_lines.append(
f"- ❌ PR commit `{pr_commit_desc}` references `{cve_id}` but \n"
f" upstream commit `{short_uhash}` has no CVE assigned\n"
)
else:
prefix = "[CVE-NOTFOUND] "
header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but "
f"upstream commit {short_uhash} has no CVE assigned")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ' * len(prefix))
)
out_lines.append("") # blank line
except (subprocess.SubprocessError, OSError) as e:
# Error running cve_search
if cve_id:
any_findings = True
if args.markdown:
out_lines.append(
f"- ⚠️ PR commit `{pr_commit_desc}` references `{cve_id}` but \n"
f" failed to verify: {str(e)}\n"
)
else:
prefix = "[CVE-ERROR] "
header = (f"{prefix}PR commit {pr_commit_desc} references {cve_id} but "
f"failed to verify: {str(e)}")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ' * len(prefix))
)
out_lines.append("") # blank line

if any_findings:
if args.markdown:
print("## :mag: Upstream Linux Kernel Commit Check\n")
Expand Down