Skip to content

Commit 9c5a208

Browse files
davidmatousekclaude
andcommitted
fix: fall back to architecture.md H1 when threats.md lacks project name
The orchestrator output template writes a literal `# Threat Model Report` H1 that matches neither of `parse_project_name()`'s accepted formats (`# {Name} Threat Model` or `# Threat Model: {Name}`), causing every PDF cover page and infographic to read "Unknown Project" unless `--title` was passed explicitly. Add an architecture.md H1 fallback that runs after threats.md parsing fails. Feature 120 already snapshots architecture.md into each run's output directory, so the fallback is reliable for real pipeline runs. Supports both em-dash formats observed in the wild: # {Name} — Architecture (example convention) # Security Architecture — {Name} (user projects) # Architecture — {Name} Precedence: `--title` override > threats.md H1 > architecture.md H1 > "Unknown Project". Regenerated the 2 baseline PDFs whose examples ship architecture.md (web-app, microservices) under SOURCE_DATE_EPOCH=1700000000 per ADR-021. The 3 examples without architecture.md (ascii-web-api, mermaid-agentic-app, free-text-microservice) still resolve to "Unknown Project" and their baselines are byte-identical. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6687df6 commit 9c5a208

File tree

6 files changed

+1558
-1445
lines changed

6 files changed

+1558
-1445
lines changed

examples/microservices/security-report.pdf.baseline

Lines changed: 1080 additions & 1087 deletions
Large diffs are not rendered by default.

examples/web-app/security-report.pdf.baseline

Lines changed: 315 additions & 341 deletions
Large diffs are not rendered by default.

scripts/extract-infographic-data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1576,7 +1576,7 @@ def main():
15761576

15771577
# Parse frontmatter and project name
15781578
frontmatter = parse_frontmatter(threats_content)
1579-
project_name = parse_project_name(threats_content)
1579+
project_name = parse_project_name(threats_content, target_dir=target_dir)
15801580

15811581
# Parse baseline metadata for delta-aware output
15821582
baseline = parse_baseline_frontmatter(threats_content)

scripts/extract-report-data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,7 @@ def main():
15911591
frontmatter = parse_frontmatter(threats_content)
15921592

15931593
# Parse project name
1594-
project_name = parse_project_name(threats_content, args.title)
1594+
project_name = parse_project_name(threats_content, args.title, target_dir)
15951595

15961596
# Parse baseline metadata for delta-aware output
15971597
baseline = parse_baseline_frontmatter(threats_content)

scripts/tachi_parsers.py

Lines changed: 63 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -286,36 +286,84 @@ def parse_baseline_frontmatter(content: str) -> dict:
286286
# Project Name Parser
287287
# =============================================================================
288288

289-
def parse_project_name(content: str, title_override: str = None) -> str:
290-
"""Extract project name from threats.md H1 heading.
291-
292-
Supports two heading formats:
293-
- "# {Name} Threat Model" (orchestrator output format)
294-
- "# Threat Model: {Name}" (legacy format)
295-
296-
Args:
297-
content: threats.md content.
298-
title_override: If provided, use this instead of auto-detected name.
299-
300-
Returns:
301-
Project name string.
289+
def parse_project_name(
290+
content: str,
291+
title_override: str = None,
292+
target_dir: Path = None,
293+
) -> str:
294+
"""Extract project name from threats.md H1, with architecture.md fallback.
295+
296+
Precedence:
297+
1. ``title_override`` if provided (CLI --title wins)
298+
2. threats.md H1 in one of the two recognized formats
299+
3. architecture.md H1 in ``target_dir`` (snapshot from Feature 120)
300+
4. ``"Unknown Project"`` fallback
301+
302+
Recognized threats.md H1 formats:
303+
- ``# {Name} Threat Model``
304+
- ``# Threat Model: {Name}``
305+
306+
Recognized architecture.md H1 formats (em-dash separated):
307+
- ``# {Name} — Architecture`` (example convention)
308+
- ``# Security Architecture — {Name}`` / ``# Architecture — {Name}``
309+
310+
The current orchestrator output template writes a literal
311+
``# Threat Model Report`` H1, which matches neither threats.md format, so
312+
the architecture.md fallback recovers the name for real pipeline runs that
313+
snapshot architecture.md alongside threats.md.
302314
"""
303315
if title_override:
304316
return title_override
305317

306-
# Format 1: "# {Name} Threat Model" (orchestrator output)
307318
match = re.search(r"^#\s+(.+?)\s+Threat Model\s*$", content, re.MULTILINE)
308319
if match:
309320
return match.group(1).strip()
310321

311-
# Format 2: "# Threat Model: {Name}" (legacy)
312322
match = re.search(r"^#\s+Threat Model:\s*(.+)$", content, re.MULTILINE)
313323
if match:
314324
return match.group(1).strip()
315325

326+
if target_dir is not None:
327+
arch_name = _parse_architecture_project_name(target_dir)
328+
if arch_name:
329+
return arch_name
330+
316331
return "Unknown Project"
317332

318333

334+
def _parse_architecture_project_name(target_dir: Path):
335+
"""Extract project name from architecture.md H1 in ``target_dir``.
336+
337+
Returns None when architecture.md is absent, unreadable, or has no
338+
parseable H1 in the recognized em-dash formats.
339+
"""
340+
arch_path = target_dir / "architecture.md"
341+
if not arch_path.is_file():
342+
return None
343+
344+
try:
345+
arch_content = arch_path.read_text(encoding="utf-8")
346+
except OSError:
347+
return None
348+
349+
match = re.search(r"^#\s+(.+)$", arch_content, re.MULTILINE)
350+
if not match:
351+
return None
352+
353+
heading = match.group(1).strip()
354+
parts = [p.strip() for p in heading.split(" — ")]
355+
if len(parts) != 2:
356+
return None
357+
358+
left, right = parts
359+
if left.lower() in ("architecture", "security architecture"):
360+
return right or None
361+
if right.lower() == "architecture":
362+
return left or None
363+
364+
return None
365+
366+
319367
# =============================================================================
320368
# Artifact Detection
321369
# =============================================================================
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Unit tests for ``parse_project_name`` in ``scripts/tachi_parsers.py``.
2+
3+
Covers precedence ordering (title override > threats.md H1 > architecture.md
4+
fallback > "Unknown Project"), both threats.md formats, both architecture.md
5+
em-dash formats, and edge cases (missing file, unreadable file, malformed H1).
6+
"""
7+
8+
import importlib.util
9+
import sys
10+
from pathlib import Path
11+
12+
import pytest
13+
14+
15+
REPO_ROOT = Path(__file__).resolve().parents[2]
16+
PARSER_PATH = REPO_ROOT / "scripts" / "tachi_parsers.py"
17+
18+
19+
def _load_parser_module():
20+
spec = importlib.util.spec_from_file_location("tachi_parsers", PARSER_PATH)
21+
module = importlib.util.module_from_spec(spec)
22+
sys.modules["tachi_parsers"] = module
23+
spec.loader.exec_module(module)
24+
return module
25+
26+
27+
tachi_parsers = _load_parser_module()
28+
parse_project_name = tachi_parsers.parse_project_name
29+
30+
31+
class TestTitleOverride:
32+
def test_title_override_wins_over_all_sources(self, tmp_path):
33+
(tmp_path / "architecture.md").write_text("# Alpha — Architecture\n")
34+
content = "# Beta Threat Model\n"
35+
assert parse_project_name(
36+
content, title_override="Gamma", target_dir=tmp_path
37+
) == "Gamma"
38+
39+
def test_empty_title_override_falls_through(self):
40+
assert parse_project_name("# Alpha Threat Model\n", title_override="") == "Alpha"
41+
42+
43+
class TestThreatsMdFormats:
44+
def test_format_orchestrator_output(self):
45+
assert parse_project_name("# Alpha Threat Model\n") == "Alpha"
46+
47+
def test_format_legacy_colon(self):
48+
assert parse_project_name("# Threat Model: Beta\n") == "Beta"
49+
50+
def test_multiword_name(self):
51+
assert parse_project_name("# Web Application Threat Model\n") == "Web Application"
52+
53+
def test_name_with_hyphens(self):
54+
assert parse_project_name("# Threat Model: second-brain-mcp\n") == "second-brain-mcp"
55+
56+
57+
class TestArchitectureMdFallback:
58+
def test_name_before_architecture_suffix(self, tmp_path):
59+
(tmp_path / "architecture.md").write_text("# Web Application — Architecture\n")
60+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Web Application"
61+
62+
def test_name_after_security_architecture_prefix(self, tmp_path):
63+
(tmp_path / "architecture.md").write_text("# Security Architecture — second-brain-mcp\n")
64+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "second-brain-mcp"
65+
66+
def test_name_after_plain_architecture_prefix(self, tmp_path):
67+
(tmp_path / "architecture.md").write_text("# Architecture — my-service\n")
68+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "my-service"
69+
70+
def test_no_architecture_file_falls_back_to_unknown(self, tmp_path):
71+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Unknown Project"
72+
73+
def test_target_dir_none_skips_fallback(self):
74+
assert parse_project_name("# Threat Model Report\n") == "Unknown Project"
75+
76+
def test_architecture_without_em_dash_is_ignored(self, tmp_path):
77+
(tmp_path / "architecture.md").write_text("# Plain Heading\n")
78+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Unknown Project"
79+
80+
def test_architecture_with_hyphen_instead_of_em_dash_is_ignored(self, tmp_path):
81+
(tmp_path / "architecture.md").write_text("# Web Application - Architecture\n")
82+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Unknown Project"
83+
84+
def test_threats_h1_wins_over_architecture(self, tmp_path):
85+
(tmp_path / "architecture.md").write_text("# Alpha — Architecture\n")
86+
content = "# Beta Threat Model\n"
87+
assert parse_project_name(content, target_dir=tmp_path) == "Beta"
88+
89+
def test_architecture_with_extra_whitespace(self, tmp_path):
90+
(tmp_path / "architecture.md").write_text("# Web Application — Architecture \n")
91+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Web Application"
92+
93+
def test_architecture_h1_must_be_first_heading(self, tmp_path):
94+
# Still accepts first H1 even with multiple H1s present
95+
(tmp_path / "architecture.md").write_text(
96+
"# Web Application — Architecture\n\n# Another Heading\n"
97+
)
98+
assert parse_project_name("# Threat Model Report\n", target_dir=tmp_path) == "Web Application"

0 commit comments

Comments
 (0)