Skip to content

Commit 819da3b

Browse files
committed
Convert change detection to a Python script
1 parent bb5c687 commit 819da3b

File tree

2 files changed

+242
-106
lines changed

2 files changed

+242
-106
lines changed

.github/workflows/reusable-change-detection.yml

Lines changed: 43 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -52,122 +52,59 @@ jobs:
5252
timeout-minutes: 10
5353
outputs:
5454
config-hash: ${{ steps.config-hash.outputs.hash }}
55-
run-cifuzz: ${{ steps.check.outputs.run-cifuzz }}
56-
run-docs: ${{ steps.docs-changes.outputs.run-docs }}
57-
run-hypothesis: ${{ steps.check.outputs.run-hypothesis }}
58-
run-tests: ${{ steps.check.outputs.run-tests }}
59-
run-win-msi: ${{ steps.win-msi-changes.outputs.run-win-msi }}
55+
run-cifuzz: ${{ steps.changes.outputs.run-cifuzz }}
56+
run-docs: ${{ steps.changes.outputs.run-docs }}
57+
run-hypothesis: ${{ steps.changes.outputs.run-hypothesis }}
58+
run-tests: ${{ steps.changes.outputs.run-tests }}
59+
run-win-msi: ${{ steps.changes.outputs.run-win-msi }}
6060
steps:
61+
- uses: actions/setup-python@v5
62+
with:
63+
python-version: "3"
64+
6165
- run: >-
6266
echo '${{ github.event_name }}'
67+
6368
- uses: actions/checkout@v4
6469
with:
6570
persist-credentials: false
66-
- name: Check for source changes
67-
id: check
71+
ref: >-
72+
${{
73+
github.event_name == 'pull_request'
74+
&& github.event.pull_request.head.sha
75+
|| ''
76+
}}
77+
78+
# Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721
79+
- name: Fetch commits to get branch diff
80+
if: github.event_name == 'pull_request'
6881
run: |
69-
if [ -z "$GITHUB_BASE_REF" ]; then
70-
echo "run-tests=true" >> "$GITHUB_OUTPUT"
71-
else
72-
git fetch origin "$GITHUB_BASE_REF" --depth=1
73-
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) may be more
74-
# reliable than git diff "origin/$GITHUB_BASE_REF.." (2 dots),
75-
# but it requires to download more commits (this job uses
76-
# "git fetch --depth=1").
77-
#
78-
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) works with Git
79-
# 2.26, but Git 2.28 is stricter and fails with "no merge base".
80-
#
81-
# git diff "origin/$GITHUB_BASE_REF.." (2 dots) should be enough on
82-
# GitHub, since GitHub starts by merging origin/$GITHUB_BASE_REF
83-
# into the PR branch anyway.
84-
#
85-
# https://github.com/python/core-workflow/issues/373
86-
grep_ignore_args=(
87-
# file extensions
88-
-e '\.md$'
89-
-e '\.rst$'
90-
# top-level folders
91-
-e '^Doc/'
92-
-e '^Misc/'
93-
# configuration files
94-
-e '^\.github/CODEOWNERS$'
95-
-e '^\.pre-commit-config\.yaml$'
96-
-e '\.ruff\.toml$'
97-
-e 'mypy\.ini$'
98-
)
99-
git diff --name-only "origin/$GITHUB_BASE_REF.." \
100-
| grep -qvE "${grep_ignore_args[@]}" \
101-
&& echo "run-tests=true" >> "$GITHUB_OUTPUT" || true
102-
fi
82+
set -eux
83+
84+
# Fetch enough history to find a common ancestor commit (aka merge-base):
85+
git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \
86+
--no-tags --prune --no-recurse-submodules
87+
88+
# This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from):
89+
COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" )
90+
DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" )
10391
104-
# Check if we should run hypothesis tests
105-
GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
106-
echo "$GIT_BRANCH"
107-
if $(echo "$GIT_BRANCH" | grep -q -w '3\.\(8\|9\|10\|11\)'); then
108-
echo "Branch too old for hypothesis tests"
109-
echo "run-hypothesis=false" >> "$GITHUB_OUTPUT"
110-
else
111-
echo "Run hypothesis tests"
112-
echo "run-hypothesis=true" >> "$GITHUB_OUTPUT"
113-
fi
92+
# Get all commits since that commit date from the base branch (eg: master or main):
93+
git fetch origin "${refspec_base}" --shallow-since="${DATE}" \
94+
--no-tags --prune --no-recurse-submodules
95+
env:
96+
branch_pr: 'origin/${{ github.event.pull_request.head.ref }}'
97+
commits: ${{ github.event.pull_request.commits }}
98+
refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}'
99+
refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}'
100+
101+
# We only want to run tests on PRs when related files are changed,
102+
# or when someone triggers a manual workflow run.
103+
- name: Compute changed files
104+
id: changes
105+
run: python Tools/build/compute-changes.py
114106

115-
# oss-fuzz maintains a configuration for fuzzing the main branch of
116-
# CPython, so CIFuzz should be run only for code that is likely to be
117-
# merged into the main branch; compatibility with older branches may
118-
# be broken.
119-
FUZZ_RELEVANT_FILES='(\.c$|\.h$|\.cpp$|^configure$|^\.github/workflows/build\.yml$|^Modules/_xxtestfuzz)'
120-
if [ "$GITHUB_BASE_REF" = "main" ] && [ "$(git diff --name-only "origin/$GITHUB_BASE_REF.." | grep -qE $FUZZ_RELEVANT_FILES; echo $?)" -eq 0 ]; then
121-
# The tests are pretty slow so they are executed only for PRs
122-
# changing relevant files.
123-
echo "Run CIFuzz tests"
124-
echo "run-cifuzz=true" >> "$GITHUB_OUTPUT"
125-
else
126-
echo "Branch too old for CIFuzz tests; or no C files were changed"
127-
echo "run-cifuzz=false" >> "$GITHUB_OUTPUT"
128-
fi
129107
- name: Compute hash for config cache key
130108
id: config-hash
131109
run: |
132110
echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT"
133-
- name: Get a list of the changed documentation-related files
134-
if: github.event_name == 'pull_request'
135-
id: changed-docs-files
136-
uses: Ana06/[email protected]
137-
with:
138-
filter: |
139-
Doc/**
140-
Misc/**
141-
.github/workflows/reusable-docs.yml
142-
format: csv # works for paths with spaces
143-
- name: Check for docs changes
144-
# We only want to run this on PRs when related files are changed,
145-
# or when user triggers manual workflow run.
146-
if: >-
147-
(
148-
github.event_name == 'pull_request'
149-
&& steps.changed-docs-files.outputs.added_modified_renamed != ''
150-
) || github.event_name == 'workflow_dispatch'
151-
id: docs-changes
152-
run: |
153-
echo "run-docs=true" >> "${GITHUB_OUTPUT}"
154-
- name: Get a list of the MSI installer-related files
155-
if: github.event_name == 'pull_request'
156-
id: changed-win-msi-files
157-
uses: Ana06/[email protected]
158-
with:
159-
filter: |
160-
Tools/msi/**
161-
.github/workflows/reusable-windows-msi.yml
162-
format: csv # works for paths with spaces
163-
- name: Check for changes in MSI installer-related files
164-
# We only want to run this on PRs when related files are changed,
165-
# or when user triggers manual workflow run.
166-
if: >-
167-
(
168-
github.event_name == 'pull_request'
169-
&& steps.changed-win-msi-files.outputs.added_modified_renamed != ''
170-
) || github.event_name == 'workflow_dispatch'
171-
id: win-msi-changes
172-
run: |
173-
echo "run-win-msi=true" >> "${GITHUB_OUTPUT}"

Tools/build/compute-changes.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""Determine which GitHub Actions workflows to run.
2+
3+
Called by ``.github/workflows/reusable-change-detection.yml``.
4+
We only want to run tests on PRs when related files are changed,
5+
or when someone triggers a manual workflow run.
6+
This improves developer experience by not doing (slow)
7+
unnecessary work in GHA, and saves CI resources.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import os
13+
import subprocess
14+
from dataclasses import dataclass
15+
from pathlib import Path
16+
17+
TYPE_CHECKING = False
18+
if TYPE_CHECKING:
19+
from collections.abc import Set
20+
21+
GITHUB_CODEOWNERS_PATH = Path(".github/CODEOWNERS")
22+
GITHUB_WORKFLOWS_PATH = Path(".github/workflows")
23+
CONFIGURATION_FILE_NAMES = frozenset({
24+
".pre-commit-config.yaml",
25+
".ruff.toml",
26+
"mypy.ini",
27+
})
28+
SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"})
29+
SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"})
30+
31+
32+
@dataclass(kw_only=True, slots=True)
33+
class Outputs:
34+
run_ci_fuzz: bool = False
35+
run_docs: bool = False
36+
run_hypothesis: bool = False
37+
run_tests: bool = False
38+
run_windows_msi: bool = False
39+
40+
41+
def compute_changes():
42+
target_branch, head_branch = git_branches()
43+
if target_branch and head_branch:
44+
# Getting changed files only makes sense on a pull request
45+
files = get_changed_files(
46+
f"origin/{target_branch}", f"origin/{head_branch}"
47+
)
48+
outputs = process_changed_files(files)
49+
else:
50+
# Otherwise, just run the tests
51+
outputs = Outputs(run_tests=True)
52+
outputs = process_target_branch(outputs, target_branch)
53+
54+
if outputs.run_tests:
55+
print("Run tests")
56+
57+
if outputs.run_hypothesis:
58+
print("Run hypothesis tests")
59+
60+
if outputs.run_ci_fuzz:
61+
print("Run CIFuzz tests")
62+
else:
63+
print("Branch too old for CIFuzz tests; or no C files were changed")
64+
65+
if outputs.run_docs:
66+
print("Build documentation")
67+
68+
if outputs.run_windows_msi:
69+
print("Build Windows MSI")
70+
71+
print(outputs)
72+
73+
write_github_output(outputs)
74+
75+
76+
def git_branches() -> tuple[str, str]:
77+
target_branch = os.environ.get("GITHUB_BASE_REF", "")
78+
target_branch = target_branch.removeprefix("refs/heads/")
79+
print(f"target branch: {target_branch!r}")
80+
81+
head_branch = os.environ.get("GITHUB_HEAD_REF", "")
82+
head_branch = head_branch.removeprefix("refs/heads/")
83+
print(f"head branch: {head_branch!r}")
84+
return target_branch, head_branch
85+
86+
87+
def get_changed_files(ref_a: str = "main", ref_b: str = "HEAD") -> Set[Path]:
88+
"""List the files changed between two Git refs, filtered by change type."""
89+
args = ("git", "diff", "--name-only", f"{ref_a}...{ref_b}", "--")
90+
print(*args)
91+
changed_files_result = subprocess.run(
92+
args, stdout=subprocess.PIPE, check=True, encoding="utf-8"
93+
)
94+
changed_files = changed_files_result.stdout.strip().splitlines()
95+
return frozenset(map(Path, filter(None, map(str.strip, changed_files))))
96+
97+
98+
def process_changed_files(changed_files: Set[Path]) -> Outputs:
99+
run_tests = False
100+
run_ci_fuzz = False
101+
run_docs = False
102+
run_windows_msi = False
103+
104+
for file in changed_files:
105+
file_name = file.name
106+
file_suffix = file.suffix
107+
file_parts = file.parts
108+
109+
# Documentation files
110+
doc_or_misc = file_parts[0] in {"Doc", "Misc"}
111+
doc_file = file_suffix in SUFFIXES_DOCUMENTATION or doc_or_misc
112+
113+
if file.parent == GITHUB_WORKFLOWS_PATH:
114+
if file_name == "build.yml":
115+
run_tests = run_ci_fuzz = True
116+
if file_name == "reusable-docs.yml":
117+
run_docs = True
118+
if file_name == "reusable-windows-msi.yml":
119+
run_windows_msi = True
120+
121+
if not (
122+
doc_file
123+
or file == GITHUB_CODEOWNERS_PATH
124+
or file_name in CONFIGURATION_FILE_NAMES
125+
):
126+
run_tests = True
127+
128+
# The fuzz tests are pretty slow so they are executed only for PRs
129+
# changing relevant files.
130+
if file_suffix in SUFFIXES_C_OR_CPP:
131+
run_ci_fuzz = True
132+
if file_parts[:2] in {
133+
("configure",),
134+
("Modules", "_xxtestfuzz"),
135+
}:
136+
run_ci_fuzz = True
137+
138+
# Check for changed documentation-related files
139+
if doc_file:
140+
run_docs = True
141+
142+
# Check for changed MSI installer-related files
143+
if file_parts[:2] == ("Tools", "msi"):
144+
run_windows_msi = True
145+
146+
return Outputs(
147+
run_ci_fuzz=run_ci_fuzz,
148+
run_docs=run_docs,
149+
run_tests=run_tests,
150+
run_windows_msi=run_windows_msi,
151+
)
152+
153+
154+
def process_target_branch(outputs: Outputs, git_branch: str) -> Outputs:
155+
if not git_branch:
156+
outputs.run_tests = True
157+
158+
# Check if we should run the hypothesis tests
159+
if git_branch in {"3.8", "3.9", "3.10", "3.11"}:
160+
print("Branch too old for hypothesis tests")
161+
outputs.run_hypothesis = False
162+
else:
163+
outputs.run_hypothesis = outputs.run_tests
164+
165+
# oss-fuzz maintains a configuration for fuzzing the main branch of
166+
# CPython, so CIFuzz should be run only for code that is likely to be
167+
# merged into the main branch; compatibility with older branches may
168+
# be broken.
169+
if git_branch != "main":
170+
outputs.run_ci_fuzz = False
171+
172+
if os.environ.get("GITHUB_EVENT_NAME", "").lower() == "workflow_dispatch":
173+
outputs.run_docs = True
174+
outputs.run_windows_msi = True
175+
176+
return outputs
177+
178+
179+
def write_github_output(outputs: Outputs) -> None:
180+
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
181+
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-output-parameter
182+
if "GITHUB_OUTPUT" not in os.environ:
183+
print("GITHUB_OUTPUT not defined!")
184+
return
185+
186+
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f:
187+
f.write(f"run-cifuzz={bool_lower(outputs.run_ci_fuzz)}\n")
188+
f.write(f"run-docs={bool_lower(outputs.run_docs)}\n")
189+
f.write(f"run-hypothesis={bool_lower(outputs.run_hypothesis)}\n")
190+
f.write(f"run-tests={bool_lower(outputs.run_tests)}\n")
191+
f.write(f"run-win-msi={bool_lower(outputs.run_windows_msi)}\n")
192+
193+
194+
def bool_lower(value: bool, /) -> str:
195+
return "true" if value else "false"
196+
197+
198+
if __name__ == "__main__":
199+
compute_changes()

0 commit comments

Comments
 (0)