refactor(142): simplify code per /simplify review (#174)

davidmatousek · claude · web-flow · commit b6af21c186e5 · 2026-04-16T17:26:43.000-04:00
- scripts/tachi_parsers.py: drop duplicate em-dash codepoint from
  parse_finding_pattern tuple; trim PR-narrative docstring to focus
  on behavior
- tests/scripts/test_pattern_synthesis.py: replace 206-line
  handrolled YAML parser with yaml.safe_load; add pyyaml to
  requirements-dev.txt as explicit dev dep (already used by
  test_pattern_classification_rules.py)
- tests/scripts/test_pattern_extraction.py,
  test_pattern_classification_rules.py: derive canonical pattern
  list from tachi_parsers.VALID_AGENTIC_PATTERNS instead of
  duplicating enum values
- tests/scripts/test_finding_pattern_parser.py: delete ~25
  one-line docstrings that restate the test name (KB-027
  narrative-comment sprawl); keep docstrings with non-obvious why
- BACKLOG.md: regenerated snapshot

Net: -255 lines. 268/269 tests pass (1 intentional skip).

Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/docs/product/_backlog/BACKLOG.md b/docs/product/_backlog/BACKLOG.md
@@ -1,6 +1,6 @@
 # Backlog
 
-> Auto-generated from GitHub Issues on 2026-04-16T21:08:50Z.
+> Auto-generated from GitHub Issues on 2026-04-16T21:09:47Z.
 > Source of truth: GitHub Issues with `stage:*` labels.
 > Regenerate: `/aod.status` or `.aod/scripts/bash/backlog-regenerate.sh`
 
@@ -39,7 +39,7 @@
 
 | # | Title | Delivered | Retro | Updated |
 |---|-------|-----------|-------|---------|
-| #142 | MAESTRO Phase 3: Agentic threat pattern expansion (Collusion, Emergent Behavior, Temporal, Trust, Communication, Resource) | 2026-04-16 | — | 2026-04-16 |
+| — | *No items in this stage* | | |
 
 ## Untracked
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,3 +2,4 @@
 # Install with: pip install -r requirements-dev.txt
 pytest>=8.0
 pytest-cov>=4.1
+pyyaml>=6.0
diff --git a/scripts/tachi_parsers.py b/scripts/tachi_parsers.py
@@ -91,33 +91,16 @@ def _parse_int(s: str) -> int:
 def parse_finding_pattern(value) -> str:
     """Normalize an ``agentic_pattern`` cell value to a canonical enum string.
 
-    Accepts string, None, or missing (empty) input. Returns one of the eight
-    canonical lowercase values in ``VALID_AGENTIC_PATTERNS``:
-    ``agent_collusion``, ``emergent_behavior``, ``temporal_attack``,
-    ``trust_exploitation``, ``communication_vulnerability``,
-    ``resource_competition``, ``none``, ``multiple``.
-
-    Backward compatibility (per FR-017, Feature 142): null, missing,
-    empty-string, whitespace-only, ``"—"`` (em dash placeholder rendered by
-    FR-009 for findings with ``agentic_pattern: none``), ``"-"`` (ASCII
-    dash), and unrecognized strings all collapse to ``"none"``. This lets
-    pre-Feature-142 threats.md (no Pattern column) parse cleanly with
-    every finding defaulted to ``none``, and preserves determinism across
-    schema version skew (per ADR-021 / ADR-026).
-
-    Case-insensitive on input: ``Agent_Collusion`` and ``AGENT_COLLUSION``
-    both normalize to ``agent_collusion``. Canonical storage is always
-    lowercase.
-
-    See: ADR-026 (Hybrid Post-Hoc Synthesis mechanism for Phase 3.6 pattern
-    classification) and
-    ``.claude/skills/tachi-shared/references/maestro-agentic-patterns-shared.md``
-    for pattern semantics and the classification rule table.
+    Accepts string, None, or missing input and returns one of the values in
+    ``VALID_AGENTIC_PATTERNS``. Null, whitespace, em-dash placeholder, ASCII
+    dash, and unrecognized strings all collapse to ``"none"`` so pre-schema-1.4
+    inputs without a Pattern column parse cleanly. Case-insensitive; canonical
+    storage is always lowercase.
     """
     if value is None:
         return "none"
     normalized = str(value).strip().lower()
-    if not normalized or normalized in ("\u2014", "-", "—"):
+    if not normalized or normalized in ("\u2014", "-"):
         return "none"
     if normalized in VALID_AGENTIC_PATTERNS:
         return normalized
diff --git a/tests/scripts/test_finding_pattern_parser.py b/tests/scripts/test_finding_pattern_parser.py
@@ -108,33 +108,27 @@ class TestValidAgenticPatternsConstant:
     """
 
     def test_constant_is_exported(self):
-        """The module exports VALID_AGENTIC_PATTERNS at the top level."""
         import tachi_parsers
 
         assert hasattr(tachi_parsers, "VALID_AGENTIC_PATTERNS")
 
     def test_constant_has_eight_values(self):
-        """The constant contains exactly 8 enum values per FR-003."""
         assert len(VALID_AGENTIC_PATTERNS) == 8
 
     def test_constant_has_exact_values(self):
-        """The constant contains exactly the 8 canonical values (no drift)."""
         assert set(VALID_AGENTIC_PATTERNS) == set(CANONICAL_PATTERN_VALUES)
 
     def test_constant_values_are_all_lowercase(self):
-        """All enum values are lowercase strings (canonical storage format)."""
         for value in VALID_AGENTIC_PATTERNS:
             assert isinstance(value, str)
             assert value == value.lower(), (
                 f"VALID_AGENTIC_PATTERNS contains non-lowercase value: {value!r}"
             )
 
     def test_constant_has_none_sentinel(self):
-        """The constant contains the `none` sentinel for non-pattern findings."""
         assert "none" in VALID_AGENTIC_PATTERNS
 
     def test_constant_has_multiple_sentinel(self):
-        """The constant contains the `multiple` value for equal-rule matches."""
         assert "multiple" in VALID_AGENTIC_PATTERNS
 
 
@@ -147,7 +141,6 @@ class TestParseFindingPatternCanonicalValues:
 
     @pytest.mark.parametrize("canonical_value", CANONICAL_PATTERN_VALUES)
     def test_canonical_value_returns_itself(self, canonical_value):
-        """Each of the 8 canonical values normalizes to itself (identity)."""
         assert parse_finding_pattern(canonical_value) == canonical_value
 
 
@@ -197,7 +190,6 @@ class TestParseFindingPatternBackwardCompat:
     """
 
     def test_none_input_returns_none_string(self):
-        """Python None input returns the string 'none' (not None)."""
         assert parse_finding_pattern(None) == "none"
 
     def test_empty_string_returns_none(self):
@@ -210,32 +202,25 @@ def test_tabs_and_newlines_return_none(self):
         assert parse_finding_pattern("\t\n  \n") == "none"
 
     def test_em_dash_unicode_escape_returns_none(self):
-        """U+2014 em-dash via unicode escape — canonical FR-009 placeholder."""
         assert parse_finding_pattern("\u2014") == "none"
 
     def test_em_dash_literal_returns_none(self):
-        """U+2014 em-dash as a literal character in source returns 'none'."""
         assert parse_finding_pattern("—") == "none"
 
     def test_ascii_hyphen_returns_none(self):
-        """ASCII hyphen-minus returns 'none' (editor-de-curling tolerance)."""
         assert parse_finding_pattern("-") == "none"
 
     def test_unrecognized_string_returns_none(self):
-        """Unrecognized strings gracefully degrade to 'none' per FR-017."""
         assert parse_finding_pattern("xyz") == "none"
 
     def test_almost_canonical_typo_returns_none(self):
-        """A near-miss typo is not silently corrected — returns 'none'."""
         assert parse_finding_pattern("agent_collusio") == "none"
 
     def test_non_string_integer_returns_none(self):
-        """Non-string input (int) converts + fails validation → 'none'."""
-        # `str(0).strip().lower()` is "0", which is not in VALID_AGENTIC_PATTERNS
+        # str(0).strip().lower() is "0", not a canonical value — hits the fall-through.
         assert parse_finding_pattern(0) == "none"
 
     def test_em_dash_with_surrounding_whitespace_returns_none(self):
-        """Em-dash with surrounding whitespace — strip before comparison."""
         assert parse_finding_pattern("  —  ") == "none"
 
 
@@ -252,11 +237,9 @@ def findings(self):
         return parse_threats_findings(content)
 
     def test_all_findings_parsed(self, findings):
-        """All 10 rows in the fixture table are parsed."""
         assert len(findings) == 10
 
     def test_every_finding_has_pattern_field(self, findings):
-        """Every parsed finding has a populated ``agentic_pattern`` field."""
         for finding in findings:
             assert "agentic_pattern" in finding, (
                 f"finding {finding.get('id')} missing agentic_pattern key"
@@ -265,7 +248,6 @@ def test_every_finding_has_pattern_field(self, findings):
             assert finding["agentic_pattern"] != ""
 
     def test_every_pattern_is_canonical(self, findings):
-        """Every emitted agentic_pattern is one of the 8 canonical values."""
         for finding in findings:
             assert finding["agentic_pattern"] in VALID_AGENTIC_PATTERNS, (
                 f"finding {finding['id']} has non-canonical pattern "
@@ -277,14 +259,12 @@ def test_trust_exploitation_mapped(self, findings):
         assert match["agentic_pattern"] == "trust_exploitation"
 
     def test_agent_collusion_mapped_twice(self, findings):
-        """Two separate rows with agent_collusion both parse correctly."""
         collusion = [f for f in findings if f["agentic_pattern"] == "agent_collusion"]
         assert len(collusion) == 2
         ids = {f["id"] for f in collusion}
         assert ids == {"AG-2", "AGP-01"}
 
     def test_em_dash_row_parses_as_none(self, findings):
-        """The T-3 row with Pattern = `—` parses as `agentic_pattern: 'none'`."""
         em_dash_finding = next(f for f in findings if f["id"] == "T-3")
         assert em_dash_finding["agentic_pattern"] == "none"
 
@@ -309,7 +289,7 @@ def test_multiple_sentinel_mapped(self, findings):
         assert match["agentic_pattern"] == "multiple"
 
     def test_explicit_none_value_mapped(self, findings):
-        """A literal 'none' value in the table parses as 'none' (not '—')."""
+        # A literal 'none' string must parse to 'none' the same as the em-dash placeholder.
         match = next(f for f in findings if f["id"] == "AG-9")
         assert match["agentic_pattern"] == "none"
 
@@ -331,30 +311,24 @@ def findings(self):
         return parse_threats_findings(content)
 
     def test_agentic_pattern_header_detected(self, findings):
-        """The 'Agentic Pattern' spelling is detected (FR-009 canonical)."""
         # If header detection failed, every finding would default to 'none'.
-        # We have 4 non-none rows in this fixture, so at least one must be
-        # non-none for detection to have worked.
+        # The fixture has 4 non-none rows, so any detection failure collapses this count.
         non_none = [f for f in findings if f["agentic_pattern"] != "none"]
         assert len(non_none) >= 4
 
     def test_uppercase_value_canonicalized(self, findings):
-        """AGENT_COLLUSION in a table cell normalizes to 'agent_collusion'."""
         match = next(f for f in findings if f["id"] == "AG-2")
         assert match["agentic_pattern"] == "agent_collusion"
 
     def test_title_case_value_canonicalized(self, findings):
-        """Trust_Exploitation in a table cell normalizes to 'trust_exploitation'."""
         match = next(f for f in findings if f["id"] == "S-1")
         assert match["agentic_pattern"] == "trust_exploitation"
 
     def test_mixed_case_multiple_canonicalized(self, findings):
-        """'Multiple' in a cell normalizes to 'multiple'."""
         match = next(f for f in findings if f["id"] == "AG-4")
         assert match["agentic_pattern"] == "multiple"
 
     def test_mixed_case_none_canonicalized(self, findings):
-        """'None' in a cell normalizes to 'none'."""
         match = next(f for f in findings if f["id"] == "T-5")
         assert match["agentic_pattern"] == "none"
 
@@ -382,7 +356,6 @@ def test_all_findings_parsed(self, findings):
         assert len(findings) == 3
 
     def test_shifted_column_values_extracted(self, findings):
-        """Pattern column values extract correctly even at the table end."""
         trust_exp = next(f for f in findings if f["id"] == "S-1")
         assert trust_exp["agentic_pattern"] == "trust_exploitation"
 
@@ -412,11 +385,9 @@ def findings(self):
         return parse_threats_findings(content)
 
     def test_all_findings_parsed(self, findings):
-        """Four rows in the fixture table are all parsed."""
         assert len(findings) == 4
 
     def test_every_finding_has_none_pattern(self, findings):
-        """Every em-dash cell canonicalizes to 'none'."""
         for finding in findings:
             assert finding["agentic_pattern"] == "none", (
                 f"finding {finding['id']} expected 'none' got "
@@ -442,33 +413,27 @@ def findings(self):
         return parse_threats_findings(content)
 
     def test_all_findings_parsed(self, findings):
-        """All 5 rows of the pre-Feature-142 fixture parse successfully."""
         assert len(findings) == 5
 
     def test_zero_parse_errors(self, findings):
-        """Parsing produces findings (not an empty list on error)."""
         assert findings, "pre-Feature-142 fixture parsed to empty list"
 
     def test_every_finding_has_pattern_key(self, findings):
-        """Per FR-017, every finding has an ``agentic_pattern`` key populated."""
         for finding in findings:
             assert "agentic_pattern" in finding
 
     def test_every_finding_defaults_to_none(self, findings):
-        """Per FR-017, every finding defaults to 'none' (no Pattern column)."""
         for finding in findings:
             assert finding["agentic_pattern"] == "none", (
                 f"finding {finding['id']} expected default 'none' got "
                 f"{finding['agentic_pattern']!r}"
             )
 
     def test_non_pattern_fields_still_parsed(self, findings):
-        """Existing (non-pattern) fields remain correctly parsed post-FR-017."""
         first = findings[0]
         assert first["id"] == "S-1"
         assert first["component"] == "Auth Service"
         assert first["risk_level"] == "High"
-        # The mitigation field exercises the unchanged Section 7 extraction path
         assert "RS256" in first["mitigation"]
 
     def test_no_warnings_emitted(self, capsys, findings):
diff --git a/tests/scripts/test_pattern_classification_rules.py b/tests/scripts/test_pattern_classification_rules.py
@@ -38,6 +38,7 @@
 from __future__ import annotations
 
 import re
+import sys
 from pathlib import Path
 
 import pytest
@@ -49,6 +50,9 @@
 
 # Repo root resolved from this test file location: tests/scripts/test_*.py
 REPO_ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(REPO_ROOT / "scripts"))
+
+from tachi_parsers import VALID_AGENTIC_PATTERNS  # noqa: E402
 
 SHARED_REF_PATH = (
     REPO_ROOT
@@ -61,17 +65,9 @@
 
 FINDING_SCHEMA_PATH = REPO_ROOT / "schemas" / "finding.yaml"
 
-# Six canonical pattern names per data-model.md Entity 2 (matches the
-# ``agentic_pattern`` enum in schemas/finding.yaml minus ``none`` and
-# ``multiple`` sentinels). Ordering matches Section 1 canonical ordering.
-CANONICAL_PATTERNS = {
-    "agent_collusion",
-    "emergent_behavior",
-    "temporal_attack",
-    "trust_exploitation",
-    "communication_vulnerability",
-    "resource_competition",
-}
+# Six canonical pattern names — derived from VALID_AGENTIC_PATTERNS dropping
+# the `none` / `multiple` sentinels. Matches data-model.md Entity 2.
+CANONICAL_PATTERNS = set(VALID_AGENTIC_PATTERNS) - {"none", "multiple"}
 
 # Four component_type tokens per data-model.md Entity 3 Component Type Token
 # List (authoritative finite enumeration; per determinism invariant).
diff --git a/tests/scripts/test_pattern_extraction.py b/tests/scripts/test_pattern_extraction.py
@@ -86,16 +86,11 @@
 # Shared constants
 # ---------------------------------------------------------------------------
 
-# Canonical CSA pattern enum order per data-model.md Entity 6 and
-# maestro-agentic-patterns-shared.md Section 1. Used as the tertiary
-# tiebreak in FR-013 subsection ordering.
-PATTERN_ENUM_ORDER = (
-    "agent_collusion",
-    "emergent_behavior",
-    "temporal_attack",
-    "trust_exploitation",
-    "communication_vulnerability",
-    "resource_competition",
+# Canonical CSA pattern enum order (Section 1 ordering) — derived from the
+# authoritative VALID_AGENTIC_PATTERNS tuple in tachi_parsers, dropping the
+# `none` / `multiple` sentinels. Used as the tertiary tiebreak in FR-013.
+PATTERN_ENUM_ORDER = tuple(
+    p for p in VALID_AGENTIC_PATTERNS if p not in ("none", "multiple")
 )
 
 # Severity ordinal from tachi_parsers.SEVERITY_ORDINAL — reproduced here
diff --git a/tests/scripts/test_pattern_synthesis.py b/tests/scripts/test_pattern_synthesis.py