Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 43 additions & 9 deletions dev/breeze/src/airflow_breeze/utils/selective_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,15 +214,27 @@ def __hash__(self):
r"^providers/common/messaging/.*",
],
FileGroupForCi.PYTHON_PRODUCTION_FILES: [
r"^airflow-core/src/airflow/.*\.py",
r"^providers/.*\.py",
r"^pyproject.toml",
r"^hatch_build.py",
# Production Python source the runtime ships — excludes tests, docs,
# dev tooling, and generated files within those trees. Used by
# `run_python_scans` (SAST/SCA target) and the line-threshold check
# in `_is_large_enough_pr` to decide whether a PR's diff is large
# enough to force the full test matrix.
r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$",
r"^task-sdk/src/airflow/(?!.*_generated\.py$).*\.py$",
r"^airflow-ctl/src/airflowctl/(?!.*generated\.py$).*\.py$",
r"^providers/(?:[^/]+/)+src/.*\.py$",
r"^shared/[^/]+/src/.*\.py$",
r"^pyproject\.toml$",
r"^hatch_build\.py$",
],
FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES: [
r"^airflow-core/src/airflow/.*\.[jt]sx?",
r"^airflow-core/src/airflow/.*\.lock",
r"^airflow-core/src/airflow/ui/.*\.yaml$",
# Exclude the openapi-gen tree and translation bundles — those are
# generated / data files that ride under the same prefixes but
# carry no behavioral risk and would otherwise distort the
# production-code line-count gate.
r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.[jt]sx?$",
r"^airflow-core/src/airflow/.*\.lock$",
r"^airflow-core/src/airflow/ui/(?!.*/(?:openapi-gen|i18n/locales)/).*\.yaml$",
r"^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/.*\.yaml$",
],
FileGroupForCi.API_FILES: [
Expand Down Expand Up @@ -708,6 +720,12 @@ def _is_large_enough_pr(self) -> bool:

The heuristics are based on number of files changed and total lines changed,
while excluding generated files which can be ignored.

The line-count check (``LINE_THRESHOLD``) only counts lines in production-code
files — tests, docs, newsfragments, generated files, translations, dev tooling,
and similar low-risk paths do not contribute to the line count. A 1000-line test
or docs PR is not the same shape of risk as a 1000-line change to scheduler
code, and only the latter should trigger the full test matrix.
"""
FILE_THRESHOLD = 25
LINE_THRESHOLD = 500
Expand Down Expand Up @@ -738,9 +756,24 @@ def _is_large_enough_pr(self) -> bool:
console_print("[warning]Cannot determine if PR is big enough, skipping the check[/]")
return False

# The line-count gate only counts churn in production code. We compose
# the existing `*_PRODUCTION_FILES` and helm groups rather than rolling
# a bespoke pattern set, so the definition of "production code" stays
# in lockstep with the rest of CI (e.g. SAST scans targeted by
# `run_python_scans` / `run_javascript_scans`).
production_files = list(
dict.fromkeys(
self._matching_files(FileGroupForCi.PYTHON_PRODUCTION_FILES, CI_FILE_GROUP_MATCHES)
+ self._matching_files(FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES, CI_FILE_GROUP_MATCHES)
+ self._matching_files(FileGroupForCi.HELM_FILES, CI_FILE_GROUP_MATCHES)
)
)
if not production_files:
return False

try:
result = run_command(
["git", "diff", "--numstat", f"{self._commit_ref}^...{self._commit_ref}"] + relevant_files,
["git", "diff", "--numstat", f"{self._commit_ref}^...{self._commit_ref}"] + production_files,
capture_output=True,
text=True,
cwd=AIRFLOW_ROOT_PATH,
Expand All @@ -762,7 +795,8 @@ def _is_large_enough_pr(self) -> bool:
if total_lines >= LINE_THRESHOLD:
console_print(
f"[warning]Running full set of tests because PR changes {total_lines} lines "
f"in {files_changed} files[/]"
f"of production code in {len(production_files)} file(s) "
f"(of {files_changed} relevant file(s))[/]"
)
return True
except Exception:
Expand Down
60 changes: 60 additions & 0 deletions dev/breeze/tests/test_selective_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3487,6 +3487,66 @@ def test_large_pr_by_file_count(files, expected_outputs: dict[str, str]):
},
id="Single large file with 1000 lines",
),
pytest.param(
tuple(f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(10)),
"\n".join([f"100\t100\tairflow-core/tests/unit/models/test_file{i}.py" for i in range(10)]),
{
"full-tests-needed": "false",
},
id="Large test-only PR (2000 lines) does not trigger full tests",
),
pytest.param(
("docs/index.rst", "airflow-core/docs/security/security_model.rst"),
"600\t600\tdocs/index.rst\n400\t400\tairflow-core/docs/security/security_model.rst",
{
"full-tests-needed": "false",
},
id="Large docs-only PR does not trigger full tests",
),
pytest.param(
(
"airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts",
"airflow-ctl/src/airflowctl/api/datamodels/generated.py",
"task-sdk/src/airflow/sdk/api/datamodels/_generated.py",
),
"\n".join(
[
"400\t400\tairflow-core/src/airflow/ui/openapi-gen/queries/queries.ts",
"400\t400\tairflow-ctl/src/airflowctl/api/datamodels/generated.py",
"400\t400\ttask-sdk/src/airflow/sdk/api/datamodels/_generated.py",
]
),
{
"full-tests-needed": "false",
},
id="Generated-only large PR does not trigger full tests",
),
# In mixed PRs the production-file filter narrows the `git diff --numstat`
# call to the production paths, so the mocked stdout below only contains
# the production-file rows (mirroring what real git would return for
# that filtered argument list).
pytest.param(
tuple(
[f"airflow-core/src/airflow/models/file{i}.py" for i in range(5)]
+ [f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(5)]
),
"\n".join([f"60\t60\tairflow-core/src/airflow/models/file{i}.py" for i in range(5)]),
{
"full-tests-needed": "true",
},
id="Mixed PR with 600 production lines triggers (test lines excluded but prod >= 500)",
),
pytest.param(
tuple(
[f"airflow-core/src/airflow/models/file{i}.py" for i in range(5)]
+ [f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(5)]
),
"\n".join([f"20\t20\tairflow-core/src/airflow/models/file{i}.py" for i in range(5)]),
{
"full-tests-needed": "false",
},
id="Mixed PR with only 200 production lines does not trigger (test lines excluded)",
),
],
)
def test_large_pr_by_line_count(files, git_diff_output, expected_outputs: dict[str, str]):
Expand Down
Loading