Skip to content

Commit b6af21c

Browse files
refactor(142): simplify code per /simplify review (#174)
- scripts/tachi_parsers.py: drop duplicate em-dash codepoint from parse_finding_pattern tuple; trim PR-narrative docstring to focus on behavior - tests/scripts/test_pattern_synthesis.py: replace 206-line handrolled YAML parser with yaml.safe_load; add pyyaml to requirements-dev.txt as explicit dev dep (already used by test_pattern_classification_rules.py) - tests/scripts/test_pattern_extraction.py, test_pattern_classification_rules.py: derive canonical pattern list from tachi_parsers.VALID_AGENTIC_PATTERNS instead of duplicating enum values - tests/scripts/test_finding_pattern_parser.py: delete ~25 one-line docstrings that restate the test name (KB-027 narrative-comment sprawl); keep docstrings with non-obvious why - BACKLOG.md: regenerated snapshot Net: -255 lines. 268/269 tests pass (1 intentional skip). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c0e00c7 commit b6af21c

File tree

7 files changed

+27
-282
lines changed

7 files changed

+27
-282
lines changed

docs/product/_backlog/BACKLOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Backlog
22

3-
> Auto-generated from GitHub Issues on 2026-04-16T21:08:50Z.
3+
> Auto-generated from GitHub Issues on 2026-04-16T21:09:47Z.
44
> Source of truth: GitHub Issues with `stage:*` labels.
55
> Regenerate: `/aod.status` or `.aod/scripts/bash/backlog-regenerate.sh`
66
@@ -39,7 +39,7 @@
3939

4040
| # | Title | Delivered | Retro | Updated |
4141
|---|-------|-----------|-------|---------|
42-
| #142 | MAESTRO Phase 3: Agentic threat pattern expansion (Collusion, Emergent Behavior, Temporal, Trust, Communication, Resource) | 2026-04-16 || 2026-04-16 |
42+
| | *No items in this stage* | | |
4343

4444
## Untracked
4545

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
# Install with: pip install -r requirements-dev.txt
33
pytest>=8.0
44
pytest-cov>=4.1
5+
pyyaml>=6.0

scripts/tachi_parsers.py

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -91,33 +91,16 @@ def _parse_int(s: str) -> int:
9191
def parse_finding_pattern(value) -> str:
9292
"""Normalize an ``agentic_pattern`` cell value to a canonical enum string.
9393
94-
Accepts string, None, or missing (empty) input. Returns one of the eight
95-
canonical lowercase values in ``VALID_AGENTIC_PATTERNS``:
96-
``agent_collusion``, ``emergent_behavior``, ``temporal_attack``,
97-
``trust_exploitation``, ``communication_vulnerability``,
98-
``resource_competition``, ``none``, ``multiple``.
99-
100-
Backward compatibility (per FR-017, Feature 142): null, missing,
101-
empty-string, whitespace-only, ``"—"`` (em dash placeholder rendered by
102-
FR-009 for findings with ``agentic_pattern: none``), ``"-"`` (ASCII
103-
dash), and unrecognized strings all collapse to ``"none"``. This lets
104-
pre-Feature-142 threats.md (no Pattern column) parse cleanly with
105-
every finding defaulted to ``none``, and preserves determinism across
106-
schema version skew (per ADR-021 / ADR-026).
107-
108-
Case-insensitive on input: ``Agent_Collusion`` and ``AGENT_COLLUSION``
109-
both normalize to ``agent_collusion``. Canonical storage is always
110-
lowercase.
111-
112-
See: ADR-026 (Hybrid Post-Hoc Synthesis mechanism for Phase 3.6 pattern
113-
classification) and
114-
``.claude/skills/tachi-shared/references/maestro-agentic-patterns-shared.md``
115-
for pattern semantics and the classification rule table.
94+
Accepts string, None, or missing input and returns one of the values in
95+
``VALID_AGENTIC_PATTERNS``. Null, whitespace, em-dash placeholder, ASCII
96+
dash, and unrecognized strings all collapse to ``"none"`` so pre-schema-1.4
97+
inputs without a Pattern column parse cleanly. Case-insensitive; canonical
98+
storage is always lowercase.
11699
"""
117100
if value is None:
118101
return "none"
119102
normalized = str(value).strip().lower()
120-
if not normalized or normalized in ("\u2014", "-", "—"):
103+
if not normalized or normalized in ("\u2014", "-"):
121104
return "none"
122105
if normalized in VALID_AGENTIC_PATTERNS:
123106
return normalized

tests/scripts/test_finding_pattern_parser.py

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -108,33 +108,27 @@ class TestValidAgenticPatternsConstant:
108108
"""
109109

110110
def test_constant_is_exported(self):
111-
"""The module exports VALID_AGENTIC_PATTERNS at the top level."""
112111
import tachi_parsers
113112

114113
assert hasattr(tachi_parsers, "VALID_AGENTIC_PATTERNS")
115114

116115
def test_constant_has_eight_values(self):
117-
"""The constant contains exactly 8 enum values per FR-003."""
118116
assert len(VALID_AGENTIC_PATTERNS) == 8
119117

120118
def test_constant_has_exact_values(self):
121-
"""The constant contains exactly the 8 canonical values (no drift)."""
122119
assert set(VALID_AGENTIC_PATTERNS) == set(CANONICAL_PATTERN_VALUES)
123120

124121
def test_constant_values_are_all_lowercase(self):
125-
"""All enum values are lowercase strings (canonical storage format)."""
126122
for value in VALID_AGENTIC_PATTERNS:
127123
assert isinstance(value, str)
128124
assert value == value.lower(), (
129125
f"VALID_AGENTIC_PATTERNS contains non-lowercase value: {value!r}"
130126
)
131127

132128
def test_constant_has_none_sentinel(self):
133-
"""The constant contains the `none` sentinel for non-pattern findings."""
134129
assert "none" in VALID_AGENTIC_PATTERNS
135130

136131
def test_constant_has_multiple_sentinel(self):
137-
"""The constant contains the `multiple` value for equal-rule matches."""
138132
assert "multiple" in VALID_AGENTIC_PATTERNS
139133

140134

@@ -147,7 +141,6 @@ class TestParseFindingPatternCanonicalValues:
147141

148142
@pytest.mark.parametrize("canonical_value", CANONICAL_PATTERN_VALUES)
149143
def test_canonical_value_returns_itself(self, canonical_value):
150-
"""Each of the 8 canonical values normalizes to itself (identity)."""
151144
assert parse_finding_pattern(canonical_value) == canonical_value
152145

153146

@@ -197,7 +190,6 @@ class TestParseFindingPatternBackwardCompat:
197190
"""
198191

199192
def test_none_input_returns_none_string(self):
200-
"""Python None input returns the string 'none' (not None)."""
201193
assert parse_finding_pattern(None) == "none"
202194

203195
def test_empty_string_returns_none(self):
@@ -210,32 +202,25 @@ def test_tabs_and_newlines_return_none(self):
210202
assert parse_finding_pattern("\t\n \n") == "none"
211203

212204
def test_em_dash_unicode_escape_returns_none(self):
213-
"""U+2014 em-dash via unicode escape — canonical FR-009 placeholder."""
214205
assert parse_finding_pattern("\u2014") == "none"
215206

216207
def test_em_dash_literal_returns_none(self):
217-
"""U+2014 em-dash as a literal character in source returns 'none'."""
218208
assert parse_finding_pattern("—") == "none"
219209

220210
def test_ascii_hyphen_returns_none(self):
221-
"""ASCII hyphen-minus returns 'none' (editor-de-curling tolerance)."""
222211
assert parse_finding_pattern("-") == "none"
223212

224213
def test_unrecognized_string_returns_none(self):
225-
"""Unrecognized strings gracefully degrade to 'none' per FR-017."""
226214
assert parse_finding_pattern("xyz") == "none"
227215

228216
def test_almost_canonical_typo_returns_none(self):
229-
"""A near-miss typo is not silently corrected — returns 'none'."""
230217
assert parse_finding_pattern("agent_collusio") == "none"
231218

232219
def test_non_string_integer_returns_none(self):
233-
"""Non-string input (int) converts + fails validation → 'none'."""
234-
# `str(0).strip().lower()` is "0", which is not in VALID_AGENTIC_PATTERNS
220+
# str(0).strip().lower() is "0", not a canonical value — hits the fall-through.
235221
assert parse_finding_pattern(0) == "none"
236222

237223
def test_em_dash_with_surrounding_whitespace_returns_none(self):
238-
"""Em-dash with surrounding whitespace — strip before comparison."""
239224
assert parse_finding_pattern(" — ") == "none"
240225

241226

@@ -252,11 +237,9 @@ def findings(self):
252237
return parse_threats_findings(content)
253238

254239
def test_all_findings_parsed(self, findings):
255-
"""All 10 rows in the fixture table are parsed."""
256240
assert len(findings) == 10
257241

258242
def test_every_finding_has_pattern_field(self, findings):
259-
"""Every parsed finding has a populated ``agentic_pattern`` field."""
260243
for finding in findings:
261244
assert "agentic_pattern" in finding, (
262245
f"finding {finding.get('id')} missing agentic_pattern key"
@@ -265,7 +248,6 @@ def test_every_finding_has_pattern_field(self, findings):
265248
assert finding["agentic_pattern"] != ""
266249

267250
def test_every_pattern_is_canonical(self, findings):
268-
"""Every emitted agentic_pattern is one of the 8 canonical values."""
269251
for finding in findings:
270252
assert finding["agentic_pattern"] in VALID_AGENTIC_PATTERNS, (
271253
f"finding {finding['id']} has non-canonical pattern "
@@ -277,14 +259,12 @@ def test_trust_exploitation_mapped(self, findings):
277259
assert match["agentic_pattern"] == "trust_exploitation"
278260

279261
def test_agent_collusion_mapped_twice(self, findings):
280-
"""Two separate rows with agent_collusion both parse correctly."""
281262
collusion = [f for f in findings if f["agentic_pattern"] == "agent_collusion"]
282263
assert len(collusion) == 2
283264
ids = {f["id"] for f in collusion}
284265
assert ids == {"AG-2", "AGP-01"}
285266

286267
def test_em_dash_row_parses_as_none(self, findings):
287-
"""The T-3 row with Pattern = `—` parses as `agentic_pattern: 'none'`."""
288268
em_dash_finding = next(f for f in findings if f["id"] == "T-3")
289269
assert em_dash_finding["agentic_pattern"] == "none"
290270

@@ -309,7 +289,7 @@ def test_multiple_sentinel_mapped(self, findings):
309289
assert match["agentic_pattern"] == "multiple"
310290

311291
def test_explicit_none_value_mapped(self, findings):
312-
"""A literal 'none' value in the table parses as 'none' (not '—')."""
292+
# A literal 'none' string must parse to 'none' the same as the em-dash placeholder.
313293
match = next(f for f in findings if f["id"] == "AG-9")
314294
assert match["agentic_pattern"] == "none"
315295

@@ -331,30 +311,24 @@ def findings(self):
331311
return parse_threats_findings(content)
332312

333313
def test_agentic_pattern_header_detected(self, findings):
334-
"""The 'Agentic Pattern' spelling is detected (FR-009 canonical)."""
335314
# If header detection failed, every finding would default to 'none'.
336-
# We have 4 non-none rows in this fixture, so at least one must be
337-
# non-none for detection to have worked.
315+
# The fixture has 4 non-none rows, so any detection failure collapses this count.
338316
non_none = [f for f in findings if f["agentic_pattern"] != "none"]
339317
assert len(non_none) >= 4
340318

341319
def test_uppercase_value_canonicalized(self, findings):
342-
"""AGENT_COLLUSION in a table cell normalizes to 'agent_collusion'."""
343320
match = next(f for f in findings if f["id"] == "AG-2")
344321
assert match["agentic_pattern"] == "agent_collusion"
345322

346323
def test_title_case_value_canonicalized(self, findings):
347-
"""Trust_Exploitation in a table cell normalizes to 'trust_exploitation'."""
348324
match = next(f for f in findings if f["id"] == "S-1")
349325
assert match["agentic_pattern"] == "trust_exploitation"
350326

351327
def test_mixed_case_multiple_canonicalized(self, findings):
352-
"""'Multiple' in a cell normalizes to 'multiple'."""
353328
match = next(f for f in findings if f["id"] == "AG-4")
354329
assert match["agentic_pattern"] == "multiple"
355330

356331
def test_mixed_case_none_canonicalized(self, findings):
357-
"""'None' in a cell normalizes to 'none'."""
358332
match = next(f for f in findings if f["id"] == "T-5")
359333
assert match["agentic_pattern"] == "none"
360334

@@ -382,7 +356,6 @@ def test_all_findings_parsed(self, findings):
382356
assert len(findings) == 3
383357

384358
def test_shifted_column_values_extracted(self, findings):
385-
"""Pattern column values extract correctly even at the table end."""
386359
trust_exp = next(f for f in findings if f["id"] == "S-1")
387360
assert trust_exp["agentic_pattern"] == "trust_exploitation"
388361

@@ -412,11 +385,9 @@ def findings(self):
412385
return parse_threats_findings(content)
413386

414387
def test_all_findings_parsed(self, findings):
415-
"""Four rows in the fixture table are all parsed."""
416388
assert len(findings) == 4
417389

418390
def test_every_finding_has_none_pattern(self, findings):
419-
"""Every em-dash cell canonicalizes to 'none'."""
420391
for finding in findings:
421392
assert finding["agentic_pattern"] == "none", (
422393
f"finding {finding['id']} expected 'none' got "
@@ -442,33 +413,27 @@ def findings(self):
442413
return parse_threats_findings(content)
443414

444415
def test_all_findings_parsed(self, findings):
445-
"""All 5 rows of the pre-Feature-142 fixture parse successfully."""
446416
assert len(findings) == 5
447417

448418
def test_zero_parse_errors(self, findings):
449-
"""Parsing produces findings (not an empty list on error)."""
450419
assert findings, "pre-Feature-142 fixture parsed to empty list"
451420

452421
def test_every_finding_has_pattern_key(self, findings):
453-
"""Per FR-017, every finding has an ``agentic_pattern`` key populated."""
454422
for finding in findings:
455423
assert "agentic_pattern" in finding
456424

457425
def test_every_finding_defaults_to_none(self, findings):
458-
"""Per FR-017, every finding defaults to 'none' (no Pattern column)."""
459426
for finding in findings:
460427
assert finding["agentic_pattern"] == "none", (
461428
f"finding {finding['id']} expected default 'none' got "
462429
f"{finding['agentic_pattern']!r}"
463430
)
464431

465432
def test_non_pattern_fields_still_parsed(self, findings):
466-
"""Existing (non-pattern) fields remain correctly parsed post-FR-017."""
467433
first = findings[0]
468434
assert first["id"] == "S-1"
469435
assert first["component"] == "Auth Service"
470436
assert first["risk_level"] == "High"
471-
# The mitigation field exercises the unchanged Section 7 extraction path
472437
assert "RS256" in first["mitigation"]
473438

474439
def test_no_warnings_emitted(self, capsys, findings):

tests/scripts/test_pattern_classification_rules.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from __future__ import annotations
3939

4040
import re
41+
import sys
4142
from pathlib import Path
4243

4344
import pytest
@@ -49,6 +50,9 @@
4950

5051
# Repo root resolved from this test file location: tests/scripts/test_*.py
5152
REPO_ROOT = Path(__file__).resolve().parents[2]
53+
sys.path.insert(0, str(REPO_ROOT / "scripts"))
54+
55+
from tachi_parsers import VALID_AGENTIC_PATTERNS # noqa: E402
5256

5357
SHARED_REF_PATH = (
5458
REPO_ROOT
@@ -61,17 +65,9 @@
6165

6266
FINDING_SCHEMA_PATH = REPO_ROOT / "schemas" / "finding.yaml"
6367

64-
# Six canonical pattern names per data-model.md Entity 2 (matches the
65-
# ``agentic_pattern`` enum in schemas/finding.yaml minus ``none`` and
66-
# ``multiple`` sentinels). Ordering matches Section 1 canonical ordering.
67-
CANONICAL_PATTERNS = {
68-
"agent_collusion",
69-
"emergent_behavior",
70-
"temporal_attack",
71-
"trust_exploitation",
72-
"communication_vulnerability",
73-
"resource_competition",
74-
}
68+
# Six canonical pattern names — derived from VALID_AGENTIC_PATTERNS dropping
69+
# the `none` / `multiple` sentinels. Matches data-model.md Entity 2.
70+
CANONICAL_PATTERNS = set(VALID_AGENTIC_PATTERNS) - {"none", "multiple"}
7571

7672
# Four component_type tokens per data-model.md Entity 3 Component Type Token
7773
# List (authoritative finite enumeration; per determinism invariant).

tests/scripts/test_pattern_extraction.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,11 @@
8686
# Shared constants
8787
# ---------------------------------------------------------------------------
8888

89-
# Canonical CSA pattern enum order per data-model.md Entity 6 and
90-
# maestro-agentic-patterns-shared.md Section 1. Used as the tertiary
91-
# tiebreak in FR-013 subsection ordering.
92-
PATTERN_ENUM_ORDER = (
93-
"agent_collusion",
94-
"emergent_behavior",
95-
"temporal_attack",
96-
"trust_exploitation",
97-
"communication_vulnerability",
98-
"resource_competition",
89+
# Canonical CSA pattern enum order (Section 1 ordering) — derived from the
90+
# authoritative VALID_AGENTIC_PATTERNS tuple in tachi_parsers, dropping the
91+
# `none` / `multiple` sentinels. Used as the tertiary tiebreak in FR-013.
92+
PATTERN_ENUM_ORDER = tuple(
93+
p for p in VALID_AGENTIC_PATTERNS if p not in ("none", "multiple")
9994
)
10095

10196
# Severity ordinal from tachi_parsers.SEVERITY_ORDINAL — reproduced here

0 commit comments

Comments
 (0)