Skip to content

Commit f99f917

Browse files
pratlucasdcandler
andauthored
[Cherry-pick] Add automated check for downstream changes (#401)
In order to follow the project's downstream patch policy (see CONTRIBUTING.md), modifications to the LLVM codebase (all files outside the arm-software sub-directory, with a few exceptions) need to have a issue opened in GitHub in order to track the changes, with the pull request linking to that issue. This patch implements a Python script and a GitHub workflow to run the script whenever a pull request is opened or modified. The script takes a pull request number, and uses the GitHub command-line interface to get information about which files are changed, and whether the body text of the pull request contains the required linkage. To determine which files need tracking, the file changes are compared against the automerge ignore list, which already contains the rules for which files are expected to be different. The body text of the pull request can then be searched for the expectedly formatted tagging. Automating the check with a GitHub workflow should ensure downstream changes are properly tracked and that anyone making a downstream change is aware of the downstream patch policy. This currently does not check whether a pull request which states that it removes a downstream change actually completely removes a downstream change. That will require additional scripting, to follow later. This cherry-picks the changes from #351 into the 20.x release branch. Co-authored-by: dcandler <[email protected]>
1 parent 9d03ad2 commit f99f917

File tree

2 files changed

+297
-0
lines changed

2 files changed

+297
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) 2025, Arm Limited and affiliates.
2+
# Part of the Arm Toolchain project, under the Apache License v2.0 with LLVM Exceptions.
3+
# See https://llvm.org/LICENSE.txt for license information.
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
# This workflow runs the check_downstream_changes.py script in order to check
7+
# whether a pull request makes a downstream change, and if so contains the
8+
# appropriate tracking in the text. The script only requires the pull request
9+
# number, and should be triggered whenever a pull request is edited or
10+
# synchronized, since both the files changed and the text of the pull request
11+
# itself are part of the check.
12+
# The script requires the GitHub CLI to be available, and authenticated.
13+
14+
name: check-downstream-changes
15+
16+
on:
17+
# Trigger whenever a pull request is opened or changed.
18+
# Use pull_request_target since we don't want to checkout the version of
19+
# the script in the pull request, which may have been modified.
20+
pull_request_target:
21+
types:
22+
- opened
23+
- reopened
24+
- edited
25+
- synchronize
26+
branches:
27+
- arm-software
28+
- release/arm-software/**
29+
30+
jobs:
31+
check-downstream-changes:
32+
runs-on: ubuntu-24.04-arm
33+
34+
if: github.repository == 'arm/arm-toolchain'
35+
36+
steps:
37+
# Generate a token for gh tool
38+
- name: Configure Access Token
39+
uses: actions/create-github-app-token@v1
40+
id: generate-token
41+
with:
42+
app-id: ${{ secrets.SYNC_APP_ID }}
43+
private-key: ${{ secrets.SYNC_APP_PRIVATE_KEY }}
44+
45+
- name: Checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Run Check Script
49+
run: python3 arm-software/ci/check_downstream_changes.py --repo ${{ github.repository }} --pr ${{ github.event.pull_request.number }}
50+
env:
51+
GH_TOKEN: ${{ steps.generate-token.outputs.token }}
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (c) 2025, Arm Limited and affiliates.
4+
# Part of the Arm Toolchain project, under the Apache License v2.0 with LLVM Exceptions.
5+
# See https://llvm.org/LICENSE.txt for license information.
6+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
8+
"""
9+
A script to check that a pull request adheres to the downstream patch policy.
10+
If the pull request modifies file outside the arm-software build directory
11+
(or any other files excluded from automerge) then the pull request needs to
12+
contain specific text to link to a downstream tracking issue.
13+
14+
Requires the GitHub CLI tool (gh) to query the repo.
15+
"""
16+
17+
import argparse
18+
import json
19+
import logging
20+
import os
21+
import re
22+
import shlex
23+
import subprocess
24+
import sys
25+
from pathlib import Path
26+
27+
28+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
29+
logger = logging.getLogger(__name__)
30+
31+
MERGE_IGNORE_PATHSPEC_FILE = Path(__file__).parent / ".automerge_ignore"
32+
33+
34+
# Check gh is working before using it
35+
def check_gh_status() -> None:
36+
args = [
37+
"gh",
38+
"auth",
39+
"status",
40+
]
41+
logger.debug(f"Running `{shlex.join(args)}`")
42+
try:
43+
p = subprocess.run(
44+
args,
45+
check=True,
46+
capture_output=True,
47+
text=True,
48+
)
49+
except subprocess.CalledProcessError as error:
50+
logger.error(
51+
f"Check error. Failure checking gh\ncmd:{shlex.join(error.cmd)}\ncode:{error.returncode}\nstdout:{error.stdout}\nstderr:{error.stderr}"
52+
)
53+
sys.exit(1)
54+
55+
56+
# Use gh to get information about the pull request.
57+
def get_pr_json(pr_num: str, repo: str) -> dict:
58+
args = ["gh", "pr", "view", pr_num, "--repo", repo, "--json", "body,files,title"]
59+
logger.debug(f"Running `{shlex.join(args)}`")
60+
try:
61+
p = subprocess.run(
62+
args,
63+
check=True,
64+
capture_output=True,
65+
text=True,
66+
)
67+
except subprocess.CalledProcessError as error:
68+
logger.error(
69+
f"Check error. Failure querying pull request\ncmd:{shlex.join(error.cmd)}\ncode:{error.returncode}\nstdout:{error.stdout}\nstderr:{error.stderr}"
70+
)
71+
sys.exit(1)
72+
j = json.loads(p.stdout)
73+
logger.debug(
74+
f"Response from server for pull request #{pr_num}:\n{json.dumps(j, indent=4)}"
75+
)
76+
return j
77+
78+
79+
# Check that a value matches a valid issue.
80+
def is_valid_issue_num(issue_num: str, repo: str) -> bool:
81+
args = [
82+
"gh",
83+
"issue",
84+
"list",
85+
"--search",
86+
issue_num,
87+
"--repo",
88+
repo,
89+
"--state",
90+
"all",
91+
"--json",
92+
"id",
93+
]
94+
logger.debug(f"Running `{shlex.join(args)}`")
95+
try:
96+
p = subprocess.run(
97+
args,
98+
check=True,
99+
capture_output=True,
100+
text=True,
101+
)
102+
except subprocess.CalledProcessError as error:
103+
logger.error(
104+
f"Check error. Failure querying issue\ncmd:{shlex.join(error.cmd)}\ncode:{error.returncode}\nstdout:{error.stdout}\nstderr:{error.stderr}"
105+
)
106+
sys.exit(1)
107+
j = json.loads(p.stdout)
108+
logger.debug(
109+
f"Response from server for issue {issue_num}:\n{json.dumps(j, indent=4)}"
110+
)
111+
if len(j) > 0:
112+
logger.info(f"Issue found matching number #{issue_num}")
113+
return True
114+
else:
115+
logger.info(f"No issue found matching number #{issue_num}")
116+
return False
117+
118+
119+
# Test if a path is in the ignore list.
120+
def is_path_ignored(test_path: str, ignored_paths: list[str]) -> bool:
121+
for ignored_path in ignored_paths:
122+
# The ignore list contains paths or directories.
123+
# Anything in an ignored subdirectory should also be ignored.
124+
if os.path.commonpath([ignored_path, test_path]) == ignored_path:
125+
logger.debug(f"{test_path} ignored by line {ignored_path}")
126+
return True
127+
return False
128+
129+
130+
# Test if a pull request contains a downstream change
131+
def has_downstream_changes(input_json: dict) -> bool:
132+
excluded_files = []
133+
included_files = []
134+
with open(MERGE_IGNORE_PATHSPEC_FILE, "r") as f:
135+
ignored_paths = f.read().splitlines()
136+
for file in input_json["files"]:
137+
changed_file = file["path"]
138+
if is_path_ignored(changed_file, ignored_paths):
139+
excluded_files.append(changed_file)
140+
else:
141+
included_files.append(changed_file)
142+
if len(excluded_files) > 0:
143+
excluded_list = "\n".join(excluded_files)
144+
logger.info(f"File modifications excluded by ignore list:\n{excluded_list}")
145+
if len(included_files) > 0:
146+
included_list = "\n".join(included_files)
147+
logger.info(f"File modifications that require tracking:\n{included_list}")
148+
else:
149+
logger.info("No modifications to files outside exclude list found.")
150+
return len(included_files) > 0
151+
152+
153+
# Check if a pull request has been correctly tagged.
154+
# Expected formatting per the policy (with leeway for spaces):
155+
# Downstream issue:#123
156+
# Downstream issue: #123
157+
# Removes downstream issue:#123
158+
# Removes downstream issue: #123
159+
def find_pr_issue(input_json: dict) -> str:
160+
logger.debug("body text: %s", input_json["body"])
161+
matches = re.findall(
162+
"^((?:removes )?downstream issue: *#([0-9]+))",
163+
input_json["body"],
164+
flags=re.I | re.M,
165+
)
166+
if len(matches) == 0:
167+
logger.info("No downstream issue link found in pull request body.")
168+
return None
169+
tag_list = "\n".join([result[0] for result in matches])
170+
logger.info(f"Found issue links:\n{tag_list}")
171+
# There should only be one match.
172+
if len(matches) > 1:
173+
logger.info(
174+
"Multiple downstream issue links found in pull request body. Only one is expected."
175+
)
176+
return None
177+
issue_num = matches[0][1]
178+
179+
logger.info(f"Pull request text links to issue #{issue_num}")
180+
return issue_num
181+
182+
183+
def main():
184+
parser = argparse.ArgumentParser(description=__doc__)
185+
parser.add_argument(
186+
"--repo",
187+
required=True,
188+
help="GitHub repo where the pull request can be found",
189+
)
190+
parser.add_argument(
191+
"--pr",
192+
required=True,
193+
help="The number of the pull request to be checked",
194+
)
195+
parser.add_argument(
196+
"--verbose",
197+
action="store_true",
198+
help="Print verbose log messages",
199+
)
200+
201+
args = parser.parse_args()
202+
203+
if args.verbose:
204+
logger.setLevel(logging.DEBUG)
205+
206+
check_gh_status()
207+
208+
pr_json = get_pr_json(args.pr, args.repo)
209+
pr_title = pr_json["title"]
210+
logger.info(f"Checking pull request #{args.pr}: '{pr_title}'")
211+
needs_tagging = has_downstream_changes(pr_json)
212+
issue_num = find_pr_issue(pr_json)
213+
214+
link_text = "Please check https://github.com/arm/arm-toolchain/blob/arm-software/CONTRIBUTING.md#downstream-patch-policy for information on the downstream patch policy and how changes need to be tracked."
215+
if needs_tagging:
216+
if issue_num is None:
217+
logger.info(
218+
f"Check failed. Pull request #{args.pr} contains downstream changes, but does not have a correctly formatted link to a downstream tracking issue. {link_text}"
219+
)
220+
sys.exit(1)
221+
else:
222+
if not is_valid_issue_num(issue_num, args.repo):
223+
logger.info(
224+
f"Check failed. Pull request #{args.pr} contains downstream changes, but the link to the downstream tracking issue is not valid. {link_text}"
225+
)
226+
sys.exit(1)
227+
else:
228+
logger.info(
229+
f"Check passed. Pull request #{args.pr} contains downstream changes, and a correctly formatted link to a downstream tracking issue."
230+
)
231+
sys.exit(0)
232+
else:
233+
if issue_num is None:
234+
logger.info(
235+
f"Check passed. Pull request #{args.pr} contains no downstream changes, and does not link to a downstream tracking issue."
236+
)
237+
sys.exit(0)
238+
else:
239+
logger.info(
240+
f"Check failed. Pull request #{args.pr} contains no downstream changes, but links to a downstream tracking issue. {link_text}"
241+
)
242+
sys.exit(1)
243+
244+
245+
if __name__ == "__main__":
246+
main()

0 commit comments

Comments
 (0)