feat(all|pr_compliance): add PR validator, tests, CI gating

allnes · allnes · commit 43db7591406a · 2025-09-11T15:21:25.000+02:00
[What]
Add PR/commit compliance validator script, unit tests with 100% coverage, and a PR Compliance workflow split into separate jobs. Gate other PR workflows to run only after PR Compliance succeeds. Update PR templates with 12 required sections and clear RU/EN title/commit guidelines. Fix pre-commit issues and align flake8 config.

[Why]
- Enforce consistent PR titles, bodies, and commit messages.
- Provide clear, actionable feedback.
- Prevent non-compliant PRs from triggering heavy CI.
- Keep validation logic tested and maintainable.

[How]
- .github/scripts/validate_pr.py: strict title/body/commit checks; flexible group in title; clear EN messages with RU/EN examples; GitHub API for commits; robust output + exit codes; formatting/lint fixes.
- .github/workflows/pr-compliance.yml: split into Unit Tests → Title → Body → Commits; unit tests gate others; coverage=100%.
- Gate PR workflows (pre-commit, static analysis, docker) via workflow_run on successful PR Compliance.
- PR templates: 12-section skeleton, no HTML comments, visible title/body/commit guidance.
- Linting: pre-commit passes; flake8 E203 ignored to match formatter; long lines wrapped.

Scope:
- Task: 0
- Variant: 0
- Technology: all
- Folder: pr_compliance

Tests:
- Local: python -m unittest -v tests/test_validate_pr.py tests/test_validate_pr_main.py
- Coverage: coverage run -m unittest -v tests/test_validate_pr.py tests/test_validate_pr_main.py &amp;&amp; coverage report -m .github/scripts/validate_pr.py (100%)
- Pre-commit: pre-commit run --all-files (all hooks pass)

Local runs:
- Validator: GITHUB_TOKEN=&lt;token&gt; python .github/scripts/validate_pr.py --repo &lt;owner&gt;/&lt;repo&gt; --pr &lt;number&gt; --checks all --verbose
diff --git a/.github/scripts/validate_pr.py b/.github/scripts/validate_pr.py
@@ -14,14 +14,13 @@
 import os
 import re
 import sys
-import textwrap
 from typing import List, Dict, Tuple, Optional
 from urllib.request import Request, urlopen
 from urllib.parse import quote
 
 
 # --- Title validation regex (strict) ---
-TITLE_REGEX = r'''
+TITLE_REGEX = r"""
 ^(?:\[TASK\]\s*)?
 (?P<task>\d+)-(?P<variant>\d+)\.\s+
 (?P<lastname>[А-ЯA-ZЁ][а-яa-zё]+)\s+
@@ -30,11 +29,11 @@
 (?P<group>.+?)\.\s+
 (?P<taskname>\S.*)
 $
-'''
+"""
 
 
-SUBJECT_REGEX = r'^(feat|fix|perf|test|refactor|docs|build|chore)\(([a-z]+)\|([a-z0-9_]+)\): [A-Za-z0-9].*$'
-ALLOWED_TECH = {'seq', 'omp', 'mpi', 'stl', 'tbb', 'all'}
+SUBJECT_REGEX = r"^(feat|fix|perf|test|refactor|docs|build|chore)\(([a-z]+)\|([a-z0-9_]+)\): [A-Za-z0-9].*$"
+ALLOWED_TECH = {"seq", "omp", "mpi", "stl", "tbb", "all"}
 
 
 def print_section(title: str) -> None:
@@ -75,7 +74,8 @@ def validate_title(title: str) -> List[str]:
     # 1) Optional prefix is allowed; strip it for partial checks
     work = title
     if work.startswith("[TASK]"):
-        work = work[len("[TASK]") :].lstrip()
+        task_prefix_len = len("[TASK]")
+        work = work[task_prefix_len:].lstrip()
 
     # 2) Task/variant with dot
     m = re.match(r"^(\d+)-(\d+)\.\s+", work)
@@ -87,7 +87,8 @@ def validate_title(title: str) -> List[str]:
         )
         return errors
 
-    rest = work[m.end() :]
+    pos = m.end()
+    rest = work[pos:]
 
     # 3) Full name with dot after patronymic
     m = re.match(
@@ -103,7 +104,8 @@ def validate_title(title: str) -> List[str]:
         )
         return errors
 
-    rest = rest[m.end() :]
+    pos = m.end()
+    rest = rest[pos:]
 
     # 4) Group with dot
     m = re.match(r"^(.+?)\.\s+", rest, flags=re.UNICODE)
@@ -115,7 +117,8 @@ def validate_title(title: str) -> List[str]:
         )
         return errors
 
-    rest = rest[m.end() :]
+    pos = m.end()
+    rest = rest[pos:]
 
     # 5) Task name validity is enforced by the full regex (non-whitespace start)
 
@@ -144,7 +147,10 @@ def _split_sections_by_headers(body: str) -> Dict[str, Tuple[int, int]]:
     for i, m in enumerate(matches):
         start = m.start()
         end = matches[i + 1].start() if i + 1 < len(matches) else len(body)
-        header_line = body[m.start() : body.find("\n", m.start()) if "\n" in body[m.start() :] else end]
+        next_newline = body.find("\n", m.start())
+        if next_newline == -1:
+            next_newline = end
+        header_line = body[m.start() : next_newline]
         sections[header_line.strip()] = (start, end)
     return sections
 
@@ -185,7 +191,9 @@ def validate_body(body: str) -> List[str]:
         return errors
 
     if "<!--" in body:
-        errors.append("Found HTML comments '<!-- ... -->'. Remove all guidance comments.")
+        errors.append(
+            "Found HTML comments '<!-- ... -->'. Remove all guidance comments."
+        )
 
     sections_map = _split_sections_by_headers(body)
 
@@ -209,8 +217,8 @@ def validate_body(body: str) -> List[str]:
 
     if empty_labels:
         errors.append("Empty required fields (add text after the colon):")
-        for l in empty_labels:
-            errors.append(f"✗ {l}")
+        for label_entry in empty_labels:
+            errors.append(f"✗ {label_entry}")
 
     return errors
 
@@ -273,7 +281,14 @@ def validate_commit_message(message: str) -> List[str]:
     body = "\n".join(lines[2:]) if len(lines) >= 2 else ""
 
     # Body tokens at start of line
-    required_tokens = [r"^\[What\]", r"^\[Why\]", r"^\[How\]", r"^Scope:", r"^Tests:", r"^Local runs:"]
+    required_tokens = [
+        r"^\[What\]",
+        r"^\[Why\]",
+        r"^\[How\]",
+        r"^Scope:",
+        r"^Tests:",
+        r"^Local runs:",
+    ]
     for tok in required_tokens:
         if not re.search(tok, body, flags=re.MULTILINE):
             errors.append(f"Missing required body section: '{tok.strip('^')}'.")
@@ -286,7 +301,9 @@ def validate_commit_message(message: str) -> List[str]:
     else:
         required_scope = ["Task", "Variant", "Technology", "Folder"]
         for key in required_scope:
-            if not re.search(rf"^\s*[-*]?\s*{key}\s*:\s*.+$", scope_block, flags=re.MULTILINE):
+            if not re.search(
+                rf"^\s*[-*]?\s*{key}\s*:\s*.+$", scope_block, flags=re.MULTILINE
+            ):
                 errors.append(f"In 'Scope:' section missing or empty field '{key}:'.")
 
     return errors
@@ -304,9 +321,16 @@ def _load_event_payload(path: Optional[str]) -> Optional[dict]:
 
 def main() -> int:
     parser = argparse.ArgumentParser(description="PR/commit compliance validator")
-    parser.add_argument("--repo", type=str, default=os.environ.get("GITHUB_REPOSITORY"), help="owner/repo")
+    parser.add_argument(
+        "--repo",
+        type=str,
+        default=os.environ.get("GITHUB_REPOSITORY"),
+        help="owner/repo",
+    )
     parser.add_argument("--pr", type=int, default=None, help="PR number")
-    parser.add_argument("--checks", type=str, choices=["title", "body", "commits", "all"], default="all")
+    parser.add_argument(
+        "--checks", type=str, choices=["title", "body", "commits", "all"], default="all"
+    )
     parser.add_argument("--fail-on-warn", action="store_true")
     parser.add_argument("--verbose", action="store_true")
 
@@ -323,7 +347,9 @@ def main() -> int:
 
     if payload and not pr_number:
         pr_number = payload.get("number") or (
-            payload.get("pull_request", {}).get("number") if payload.get("pull_request") else None
+            payload.get("pull_request", {}).get("number")
+            if payload.get("pull_request")
+            else None
         )
 
     # Collect title/body from payload when available
@@ -346,7 +372,9 @@ def main() -> int:
         if args.checks in ("title", "all"):
             print_section("PR TITLE")
             if pr_title is None:
-                print("Could not get PR title from event payload. Ensure a pull_request context or supply it manually.")
+                print(
+                    "Could not get PR title from event payload. Ensure a pull_request context or supply it manually."
+                )
                 total_errors.append("No title data")
             else:
                 errs = validate_title(pr_title)
@@ -363,7 +391,9 @@ def main() -> int:
         if args.checks in ("body", "all"):
             print_section("PR BODY")
             if pr_body is None:
-                print("Could not get PR body from event payload. Ensure a pull_request context or supply it manually.")
+                print(
+                    "Could not get PR body from event payload. Ensure a pull_request context or supply it manually."
+                )
                 total_errors.append("No body data")
             else:
                 errs = validate_body(pr_body)
@@ -380,7 +410,9 @@ def main() -> int:
         if args.checks in ("commits", "all"):
             print_section("COMMITS")
             if not (owner and repo and pr_number):
-                print("Commit validation requires --repo owner/repo and --pr <number> or a GitHub event payload.")
+                print(
+                    "Commit validation requires --repo owner/repo and --pr <number> or a GitHub event payload."
+                )
                 total_errors.append("Insufficient params for commits fetch")
             else:
                 token = os.environ.get("GITHUB_TOKEN")
@@ -444,15 +476,29 @@ def main() -> int:
             assert validate_title(t), f"Expected invalid title: {t}"
 
         # Commit subjects
-        assert re.match(SUBJECT_REGEX, "feat(omp|nesterov_a_vector_sum): implement parallel vector sum")
+        assert re.match(
+            SUBJECT_REGEX,
+            "feat(omp|nesterov_a_vector_sum): implement parallel vector sum",
+        )
         assert not re.match(SUBJECT_REGEX, "feature(omp|x): bad type")
         # Technology validation is performed outside the regex
         errs = validate_commit_message(
-            "feat(cuda|nesterov_a_vector_sum): add cuda impl\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 2\n- Technology: cuda\n- Folder: nesterov_a_vector_sum\nTests:\nLocal runs:\n"
+            (
+                "feat(cuda|nesterov_a_vector_sum): add cuda impl"
+                "\n\n[What]\n[Why]\n[How]\nScope:\n"
+                "- Task: 1\n- Variant: 2\n- Technology: cuda\n- Folder: nesterov_a_vector_sum\n"
+                "Tests:\nLocal runs:\n"
+            )
         )
         assert any("Disallowed technology" in e for e in errs)
         too_long = "feat(omp|nesterov_a_vector_sum): " + "x" * 73
-        errs = validate_commit_message(too_long + "\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 2\n- Technology: omp\n- Folder: nesterov_a_vector_sum\nTests:\nLocal runs:\n")
+        errs = validate_commit_message(
+            (
+                too_long + "\n\n[What]\n[Why]\n[How]\nScope:\n"
+                "- Task: 1\n- Variant: 2\n- Technology: omp\n- Folder: nesterov_a_vector_sum\n"
+                "Tests:\nLocal runs:\n"
+            )
+        )
         assert any("exceeds 72" in e for e in errs)
         print("Self-tests passed")
 
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,6 @@
 [flake8]
 max-line-length = 120
+extend-ignore = E203
 exclude =
     3rdparty
     venv
diff --git a/tests/test_validate_pr.py b/tests/test_validate_pr.py
@@ -2,9 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import os
-import sys
 import unittest
-import re
 
 
 ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -27,7 +25,9 @@ def setUpClass(cls):
         cls.v = _import_validator()
 
     def test_title_valid_ru_and_en(self):
-        ok_ru = "2-12. Иванов Иван Иванович. 2341-а234. Вычисление суммы элементов вектора."
+        ok_ru = (
+            "2-12. Иванов Иван Иванович. 2341-а234. Вычисление суммы элементов вектора."
+        )
         ok_task_tag_ru = "[TASK] " + ok_ru
         ok_en = "3-7. Smith John Edward. 1234-a1. Fast matrix multiplication."
         for t in (ok_ru, ok_task_tag_ru, ok_en):
@@ -203,28 +203,36 @@ def test_commit_valid(self):
     def test_commit_invalid_cases(self):
         # wrong type
         msg1 = (
-            "feature(omp|nesterov_a_vector_sum): summary\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\nTests:\nLocal runs:\n"
+            "feature(omp|nesterov_a_vector_sum): summary\n\n[What]\n[Why]\n[How]\n"
+            "Scope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\n"
+            "Tests:\nLocal runs:\n"
         )
         # disallowed technology
         msg2 = (
-            "feat(cuda|nesterov_a_vector_sum): add cuda impl\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 1\n- Technology: cuda\n- Folder: f\nTests:\nLocal runs:\n"
+            "feat(cuda|nesterov_a_vector_sum): add cuda impl\n\n[What]\n[Why]\n[How]\n"
+            "Scope:\n- Task: 1\n- Variant: 1\n- Technology: cuda\n- Folder: f\n"
+            "Tests:\nLocal runs:\n"
         )
         # subject too long
         long_summary = "x" * 73
         msg3 = (
-            f"feat(omp|nesterov_a_vector_sum): {long_summary}\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\nTests:\nLocal runs:\n"
+            f"feat(omp|nesterov_a_vector_sum): {long_summary}\n\n[What]\n[Why]\n[How]\n"
+            "Scope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\n"
+            "Tests:\nLocal runs:\n"
         )
         # no blank line
         msg4 = (
-            "feat(omp|nesterov_a_vector_sum): ok\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\nTests:\nLocal runs:\n"
+            "feat(omp|nesterov_a_vector_sum): ok\n[What]\n[Why]\n[How]\n"
+            "Scope:\n- Task: 1\n- Variant: 1\n- Technology: omp\n- Folder: f\n"
+            "Tests:\nLocal runs:\n"
         )
         # missing tokens
-        msg5 = (
-            "feat(omp|nesterov_a_vector_sum): ok\n\nNo sections here\n"
-        )
+        msg5 = "feat(omp|nesterov_a_vector_sum): ok\n\nNo sections here\n"
         # missing fields in scope
         msg6 = (
-            "feat(omp|nesterov_a_vector_sum): ok\n\n[What]\n[Why]\n[How]\nScope:\n- Task: 1\n- Technology: omp\n- Folder: f\n\nTests:\nLocal runs:\n"
+            "feat(omp|nesterov_a_vector_sum): ok\n\n[What]\n[Why]\n[How]\n"
+            "Scope:\n- Task: 1\n- Technology: omp\n- Folder: f\n\n"
+            "Tests:\nLocal runs:\n"
         )
 
         for i, m in enumerate([msg1, msg2, msg3, msg4, msg5, msg6], start=1):
diff --git a/tests/test_validate_pr_main.py b/tests/test_validate_pr_main.py