diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 5e2a91699908e..693e15feb5910 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -214,15 +214,27 @@ def __hash__(self): r"^providers/common/messaging/.*", ], FileGroupForCi.PYTHON_PRODUCTION_FILES: [ - r"^airflow-core/src/airflow/.*\.py", - r"^providers/.*\.py", - r"^pyproject.toml", - r"^hatch_build.py", + # Production Python source the runtime ships — excludes tests, docs, + # dev tooling, and generated files within those trees. Used by + # `run_python_scans` (SAST/SCA target) and the line-threshold check + # in `_is_large_enough_pr` to decide whether a PR's diff is large + # enough to force the full test matrix. + r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.py$", + r"^task-sdk/src/airflow/(?!.*_generated\.py$).*\.py$", + r"^airflow-ctl/src/airflowctl/(?!.*generated\.py$).*\.py$", + r"^providers/(?:[^/]+/)+src/.*\.py$", + r"^shared/[^/]+/src/.*\.py$", + r"^pyproject\.toml$", + r"^hatch_build\.py$", ], FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES: [ - r"^airflow-core/src/airflow/.*\.[jt]sx?", - r"^airflow-core/src/airflow/.*\.lock", - r"^airflow-core/src/airflow/ui/.*\.yaml$", + # Exclude the openapi-gen tree and translation bundles — those are + # generated / data files that ride under the same prefixes but + # carry no behavioral risk and would otherwise distort the + # production-code line-count gate. + r"^airflow-core/src/airflow/(?!.*/(?:openapi-gen|i18n/locales)/).*\.[jt]sx?$", + r"^airflow-core/src/airflow/.*\.lock$", + r"^airflow-core/src/airflow/ui/(?!.*/(?:openapi-gen|i18n/locales)/).*\.yaml$", r"^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/.*\.yaml$", ], FileGroupForCi.API_FILES: [ @@ -708,6 +720,12 @@ def _is_large_enough_pr(self) -> bool: The heuristics are based on number of files changed and total lines changed, while excluding generated files which can be ignored. + + The line-count check (``LINE_THRESHOLD``) only counts lines in production-code + files — tests, docs, newsfragments, generated files, translations, dev tooling, + and similar low-risk paths do not contribute to the line count. A 1000-line test + or docs PR is not the same shape of risk as a 1000-line change to scheduler + code, and only the latter should trigger the full test matrix. """ FILE_THRESHOLD = 25 LINE_THRESHOLD = 500 @@ -738,9 +756,24 @@ def _is_large_enough_pr(self) -> bool: console_print("[warning]Cannot determine if PR is big enough, skipping the check[/]") return False + # The line-count gate only counts churn in production code. We compose + # the existing `*_PRODUCTION_FILES` and helm groups rather than rolling + # a bespoke pattern set, so the definition of "production code" stays + # in lockstep with the rest of CI (e.g. SAST scans targeted by + # `run_python_scans` / `run_javascript_scans`). + production_files = list( + dict.fromkeys( + self._matching_files(FileGroupForCi.PYTHON_PRODUCTION_FILES, CI_FILE_GROUP_MATCHES) + + self._matching_files(FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES, CI_FILE_GROUP_MATCHES) + + self._matching_files(FileGroupForCi.HELM_FILES, CI_FILE_GROUP_MATCHES) + ) + ) + if not production_files: + return False + try: result = run_command( - ["git", "diff", "--numstat", f"{self._commit_ref}^...{self._commit_ref}"] + relevant_files, + ["git", "diff", "--numstat", f"{self._commit_ref}^...{self._commit_ref}"] + production_files, capture_output=True, text=True, cwd=AIRFLOW_ROOT_PATH, @@ -762,7 +795,8 @@ def _is_large_enough_pr(self) -> bool: if total_lines >= LINE_THRESHOLD: console_print( f"[warning]Running full set of tests because PR changes {total_lines} lines " - f"in {files_changed} files[/]" + f"of production code in {len(production_files)} file(s) " + f"(of {files_changed} relevant file(s))[/]" ) return True except Exception: diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 69fcf6fd054c8..3ab0298735891 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -3487,6 +3487,66 @@ def test_large_pr_by_file_count(files, expected_outputs: dict[str, str]): }, id="Single large file with 1000 lines", ), + pytest.param( + tuple(f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(10)), + "\n".join([f"100\t100\tairflow-core/tests/unit/models/test_file{i}.py" for i in range(10)]), + { + "full-tests-needed": "false", + }, + id="Large test-only PR (2000 lines) does not trigger full tests", + ), + pytest.param( + ("docs/index.rst", "airflow-core/docs/security/security_model.rst"), + "600\t600\tdocs/index.rst\n400\t400\tairflow-core/docs/security/security_model.rst", + { + "full-tests-needed": "false", + }, + id="Large docs-only PR does not trigger full tests", + ), + pytest.param( + ( + "airflow-core/src/airflow/ui/openapi-gen/queries/queries.ts", + "airflow-ctl/src/airflowctl/api/datamodels/generated.py", + "task-sdk/src/airflow/sdk/api/datamodels/_generated.py", + ), + "\n".join( + [ + "400\t400\tairflow-core/src/airflow/ui/openapi-gen/queries/queries.ts", + "400\t400\tairflow-ctl/src/airflowctl/api/datamodels/generated.py", + "400\t400\ttask-sdk/src/airflow/sdk/api/datamodels/_generated.py", + ] + ), + { + "full-tests-needed": "false", + }, + id="Generated-only large PR does not trigger full tests", + ), + # In mixed PRs the production-file filter narrows the `git diff --numstat` + # call to the production paths, so the mocked stdout below only contains + # the production-file rows (mirroring what real git would return for + # that filtered argument list). + pytest.param( + tuple( + [f"airflow-core/src/airflow/models/file{i}.py" for i in range(5)] + + [f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(5)] + ), + "\n".join([f"60\t60\tairflow-core/src/airflow/models/file{i}.py" for i in range(5)]), + { + "full-tests-needed": "true", + }, + id="Mixed PR with 600 production lines triggers (test lines excluded but prod >= 500)", + ), + pytest.param( + tuple( + [f"airflow-core/src/airflow/models/file{i}.py" for i in range(5)] + + [f"airflow-core/tests/unit/models/test_file{i}.py" for i in range(5)] + ), + "\n".join([f"20\t20\tairflow-core/src/airflow/models/file{i}.py" for i in range(5)]), + { + "full-tests-needed": "false", + }, + id="Mixed PR with only 200 production lines does not trigger (test lines excluded)", + ), ], ) def test_large_pr_by_line_count(files, git_diff_output, expected_outputs: dict[str, str]):