Skip to content

Commit a418f61

Browse files
committed
feat: remove composer stage from doc review strategy
1 parent 464b351 commit a418f61

File tree

1 file changed

+91
-190
lines changed

1 file changed

+91
-190
lines changed

src/orchestration/strategies/doc_review.py

Lines changed: 91 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,8 @@
88
reports/doc-review/raw/REPORT_{slug}__r{n}.md and commits once.
99
2) Validators (streamed, 1:1): run on each reviewer branch, strictly validate
1010
and refine the report in place, committing once.
11-
3) Composer (global): after all validators complete, combine all validated
12-
reports into a single final reports/doc-review/REPORT.md on one branch.
13-
14-
Merging model: no git merges; the composer reads validated reports via git show
15-
from their branches. To keep runner isolation, we inject the file contents into
16-
the composer prompt (no workspace injection required).
11+
Note: The composer stage has been removed. This strategy returns the
12+
successful validator results; downstream tooling can aggregate reports if needed.
1713
1814
Minimal configuration:
1915
- pages_file (YAML or JSON): list of {title, path, slug?}
@@ -36,7 +32,7 @@
3632
from .base import Strategy, StrategyConfig
3733

3834
if TYPE_CHECKING:
39-
from ..strategy_context import StrategyContext, Handle
35+
from ..strategy_context import StrategyContext
4036

4137

4238
@dataclass
@@ -46,13 +42,21 @@ class DocReviewConfig(StrategyConfig):
4642
pages_file: str = "" # YAML or JSON list of pages
4743
reviewers_per_page: int = 1
4844
report_dir: str = "reports/doc-review"
45+
# Unified retry controls (retries = additional attempts after the first)
46+
reviewer_max_retries: int = 3
47+
validator_max_retries: int = 3
4948

5049
def validate(self) -> None:
5150
super().validate()
5251
if not self.pages_file:
5352
raise ValueError("pages_file is required for doc-review strategy")
5453
if self.reviewers_per_page < 1:
5554
raise ValueError("reviewers_per_page must be at least 1")
55+
if self.reviewer_max_retries < 0:
56+
raise ValueError("reviewer_max_retries must be >= 0")
57+
if self.validator_max_retries < 0:
58+
raise ValueError("validator_max_retries must be >= 0")
59+
# no composer
5660

5761

5862
class DocReviewStrategy(Strategy):
@@ -102,13 +106,16 @@ async def execute(
102106
except Exception:
103107
pass
104108

105-
# Stage 1: reviewers
106-
reviewer_handles: List[Tuple[Dict[str, Any], int, Handle]] = (
107-
[]
108-
) # (page, r_idx, handle)
109-
for page in normalized_pages:
110-
for r_idx in range(1, int(cfg.reviewers_per_page) + 1):
111-
report_rel = f"{cfg.report_dir}/raw/REPORT_{page['slug']}__r{r_idx}.md"
109+
# Unified helpers ---------------------------------------------------
110+
111+
async def _run_reviewer_with_retries(
112+
page: Dict[str, Any], r_idx: int
113+
) -> Optional[InstanceResult]:
114+
report_rel = f"{cfg.report_dir}/raw/REPORT_{page['slug']}__r{r_idx}.md"
115+
attempts = cfg.reviewer_max_retries + 1
116+
last_result: Optional[InstanceResult] = None
117+
for i in range(attempts):
118+
corrective = i > 0
112119
review_prompt = _build_reviewer_prompt(
113120
page_title=page["title"],
114121
page_path=page["path"],
@@ -118,118 +125,86 @@ async def execute(
118125
"prompt": review_prompt,
119126
"base_branch": base_branch,
120127
"model": cfg.model,
121-
# The reviewer should create the report file and commit it
122128
}
123-
key = ctx.key("page", page["slug"], f"r{r_idx}", "review")
124-
handle = await ctx.run(task, key=key)
125-
reviewer_handles.append((page, r_idx, handle))
126-
127-
# As reviewers finish, schedule validators immediately
128-
validator_handles: List[Tuple[Dict[str, Any], int, Handle]] = []
129-
# Map instance_id -> (page, r_idx)
130-
reviewer_map: Dict[str, Tuple[Dict[str, Any], int]] = {
131-
h.instance_id: (pg, idx) for (pg, idx, h) in reviewer_handles
132-
}
133-
134-
async def _wait_and_validate(
135-
handle: Handle,
136-
) -> Optional[Tuple[Dict[str, Any], int, Handle, InstanceResult]]:
137-
page, r_idx = reviewer_map[handle.instance_id]
138-
result: InstanceResult
139-
try:
140-
result = await ctx.wait(handle)
141-
except Exception:
142-
# Reviewer failed
143-
return None
144-
145-
# Guardrail: ensure report exists in branch
129+
key_parts = ["page", page["slug"], f"r{r_idx}", "review"]
130+
if corrective:
131+
key_parts.append(f"attempt-{i}")
132+
key = ctx.key(*key_parts)
133+
try:
134+
h = await ctx.run(task, key=key)
135+
result = await ctx.wait(h)
136+
except Exception:
137+
result = None # Treat as failed attempt, continue
138+
if result:
139+
last_result = result
140+
ok = _verify_file_in_branch(
141+
repo_path, result.branch_name, report_rel
142+
)
143+
if result.success and result.branch_name and ok:
144+
return result
145+
return last_result if (last_result and last_result.success) else None
146+
147+
async def _run_validator_with_retries(
148+
page: Dict[str, Any], r_idx: int, reviewer_branch: str
149+
) -> Optional[InstanceResult]:
146150
report_rel = f"{cfg.report_dir}/raw/REPORT_{page['slug']}__r{r_idx}.md"
147-
ok = _verify_file_in_branch(repo_path, result.branch_name, report_rel)
148-
if not (result.success and result.branch_name and ok):
149-
# No corrective retries; rely on strict prompt to ensure commit
150-
return None
151-
152-
# Schedule validator on the reviewer branch
153-
validator_prompt = _build_validator_prompt(
154-
page_title=page["title"],
155-
page_path=page["path"],
156-
report_path=report_rel,
157-
)
158-
v_task = {
159-
"prompt": validator_prompt,
160-
"base_branch": result.branch_name or base_branch,
161-
"model": cfg.model,
162-
}
163-
v_key = ctx.key("page", page["slug"], f"r{r_idx}", "validate")
164-
v_handle = await ctx.run(v_task, key=v_key)
165-
return (page, r_idx, v_handle, result)
166-
167-
# Kick off waits concurrently
168-
wait_tasks = [
169-
asyncio.create_task(_wait_and_validate(h)) for (_, _, h) in reviewer_handles
170-
]
171-
for coro in asyncio.as_completed(wait_tasks):
172-
res = await coro
173-
if res is None:
174-
continue
175-
page, r_idx, v_handle, _ = res
176-
validator_handles.append((page, r_idx, v_handle))
177-
178-
# Stage 2: wait for all validators and verify outputs
179-
validated_reports: List[Tuple[Dict[str, Any], int, InstanceResult]] = []
180-
181-
async def _await_validator(
182-
entry: Tuple[Dict[str, Any], int, Handle],
151+
attempts = cfg.validator_max_retries + 1
152+
last_vres: Optional[InstanceResult] = None
153+
for i in range(attempts):
154+
corrective = i > 0
155+
validator_prompt = _build_validator_prompt(
156+
page_title=page["title"],
157+
page_path=page["path"],
158+
report_path=report_rel,
159+
)
160+
v_task = {
161+
"prompt": validator_prompt,
162+
"base_branch": reviewer_branch,
163+
"model": cfg.model,
164+
}
165+
v_key_parts = ["page", page["slug"], f"r{r_idx}", "validate"]
166+
if corrective:
167+
v_key_parts.append(f"attempt-{i}")
168+
v_key = ctx.key(*v_key_parts)
169+
try:
170+
vh = await ctx.run(v_task, key=v_key)
171+
vres = await ctx.wait(vh)
172+
except Exception:
173+
vres = None
174+
if vres:
175+
last_vres = vres
176+
ok = _verify_file_in_branch(repo_path, vres.branch_name, report_rel)
177+
if vres.success and vres.branch_name and ok:
178+
return vres
179+
return last_vres if (last_vres and last_vres.success) else None
180+
181+
# (composer stage removed)
182+
183+
# Stage 1+2: Run reviewers with retries and chain validators with retries
184+
async def _review_then_validate(
185+
page: Dict[str, Any], r_idx: int
183186
) -> Optional[Tuple[Dict[str, Any], int, InstanceResult]]:
184-
page, r_idx, handle = entry
185-
report_rel = f"{cfg.report_dir}/raw/REPORT_{page['slug']}__r{r_idx}.md"
186-
try:
187-
vres = await ctx.wait(handle)
188-
except Exception:
189-
# No corrective retries
187+
rres = await _run_reviewer_with_retries(page, r_idx)
188+
if not (rres and rres.success and rres.branch_name):
190189
return None
191-
192-
ok = _verify_file_in_branch(repo_path, vres.branch_name, report_rel)
193-
if not (vres.success and vres.branch_name and ok):
190+
vres = await _run_validator_with_retries(page, r_idx, rres.branch_name)
191+
if not (vres and vres.success and vres.branch_name):
194192
return None
195193
return (page, r_idx, vres)
196194

197-
v_waits = [asyncio.create_task(_await_validator(v)) for v in validator_handles]
198-
for coro in asyncio.as_completed(v_waits):
195+
tasks = []
196+
for page in normalized_pages:
197+
for r_idx in range(1, int(cfg.reviewers_per_page) + 1):
198+
tasks.append(asyncio.create_task(_review_then_validate(page, r_idx)))
199+
200+
validated_reports: List[Tuple[Dict[str, Any], int, InstanceResult]] = []
201+
for coro in asyncio.as_completed(tasks):
199202
r = await coro
200203
if r is not None:
201204
validated_reports.append(r)
202205

203-
# Stage 3: composer
204-
# Collect all validated report contents via git show and inject into composer prompt
205-
inputs: List[Tuple[str, str, str]] = [] # (slug, title, content)
206-
for page, r_idx, vres in validated_reports:
207-
report_rel = f"{cfg.report_dir}/raw/REPORT_{page['slug']}__r{r_idx}.md"
208-
content = _read_file_from_branch(repo_path, vres.branch_name, report_rel)
209-
if content:
210-
inputs.append((page["slug"], page["title"], content))
211-
212-
composer_prompt = _build_composer_prompt(inputs, cfg.report_dir)
213-
compose_task = {
214-
"prompt": composer_prompt,
215-
"base_branch": base_branch,
216-
"model": cfg.model,
217-
}
218-
c_key = ctx.key("compose")
219-
c_handle = await ctx.run(compose_task, key=c_key)
220-
c_result = await ctx.wait(c_handle)
221-
222-
# Verify final report exists
223-
final_rel = f"{cfg.report_dir}/REPORT.md"
224-
c_ok = _verify_file_in_branch(repo_path, c_result.branch_name, final_rel)
225-
if not (c_result.success and c_result.branch_name and c_ok):
226-
# Best effort: return result but mark as failed
227-
c_result.success = False
228-
c_result.status = "failed"
229-
c_result.error = c_result.error or "Final report missing or not committed"
230-
231-
# Return only the final composer result for strategy output
232-
return [c_result]
206+
# Composer removed: return successful validator results directly
207+
return [vres for (_page, _idx, vres) in validated_reports]
233208

234209

235210
# ---------------------------
@@ -309,21 +284,6 @@ def _verify_file_in_branch(repo: Path, branch: Optional[str], rel_path: str) ->
309284
return False
310285

311286

312-
def _read_file_from_branch(
313-
repo: Path, branch: Optional[str], rel_path: str
314-
) -> Optional[str]:
315-
if not branch:
316-
return None
317-
try:
318-
cmd = ["git", "-C", str(repo), "show", f"{branch}:{rel_path}"]
319-
r = subprocess.run(cmd, capture_output=True, text=True)
320-
if r.returncode == 0:
321-
return r.stdout
322-
except Exception:
323-
return None
324-
return None
325-
326-
327287
def _build_reviewer_prompt(
328288
*,
329289
page_title: str,
@@ -474,63 +434,4 @@ def _build_validator_prompt(
474434
f"</commit_requirements>\n"
475435
)
476436

477-
478-
def _build_composer_prompt(inputs: List[Tuple[str, str, str]], report_dir: str) -> str:
479-
"""
480-
inputs: List of tuples (slug, title, content) where content is the FULL validated report text for that page.
481-
"""
482-
header = (
483-
f"<role>\n"
484-
f"You are composing a single, final user-facing documentation defect report by merging multiple validated per-page reports. "
485-
f"Your job is to deduplicate, standardize, and sequence findings while preserving precision and the required format.\n"
486-
f"</role>\n\n"
487-
f"<target>\n"
488-
f"OUTPUT FILE (required): {report_dir}/REPORT.md\n"
489-
f"</target>\n\n"
490-
f"<scope>\n"
491-
f"Include only user-facing issues. Exclude any items about repository internals, build/CI, docs engine internals, or anything not visible to readers.\n"
492-
f"</scope>\n\n"
493-
f"<truth_sources>\n"
494-
f"Do not invent new domain facts while merging. When findings conflict, prefer the most conservative reading or mark that a domain decision is required within the description. "
495-
f"The glossary remains canonical; if absent or silent, prefer the dominant/most recent usage across the docs.\n"
496-
f"</truth_sources>\n\n"
497-
f"<workflow>\n"
498-
f"1) Read all source reports in full.\n"
499-
f"2) Deduplicate: if two items are materially the same, keep one with the clearest description and best citation; fold any unique details from the duplicate into the survivor.\n"
500-
f"3) Normalize: align terminology, citation style, tone, and fix proposals across items.\n"
501-
f"4) Sequence: produce a single continuous numbering from 1..N. Grouping is not allowed in the output; only a flat list.\n"
502-
f"5) Output a single file at the required path and commit it once.\n"
503-
f"</workflow>\n\n"
504-
f"<report_format>\n"
505-
f"Keep this exact format for every finding:\n"
506-
f"- [ ] **<number>. <Short Precise Title>**\n\n"
507-
f"<repo-relative path or stable URL with optional line anchors>\n\n"
508-
f"<Detailed description of the issue and the correct version or fix. Include cross-doc citations as repo-relative paths with anchors. "
509-
f"If the entry relies on general knowledge for a basic rule, say so in one short sentence. If a domain decision is required, state it explicitly.>\n\n"
510-
f"---\n\n"
511-
f"</report_format>\n\n"
512-
f"<constraints>\n"
513-
f"- Produce exactly one file: REPORT.md at the specified path.\n"
514-
f"- Do not modify any other files.\n"
515-
f"- Make a single commit.\n"
516-
f"</constraints>\n\n"
517-
f"<persistence>\n"
518-
f"Keep going until the final merged report exists at the required path with continuous numbering and the single commit is made.\n"
519-
f"</persistence>\n\n"
520-
f"<commit_requirements>\n"
521-
f"CRITICAL: This run is considered failed unless you add and commit the final report file. "
522-
f"Uncommitted workspace changes are ignored by orchestration.\n"
523-
f"- Required file: {report_dir}/REPORT.md\n"
524-
f'- Commit exactly once after composing: git add {report_dir}/REPORT.md && git commit -m "doc-review: compose final report"\n'
525-
f"</commit_requirements>\n\n"
526-
)
527-
528-
# Concatenate inputs; include a clear, minimal provenance marker before each source
529-
body_parts = []
530-
for slug, title, content in inputs:
531-
body_parts.append(f"# Source Report: {slug} - {title}\n\n{content}\n")
532-
sources = (
533-
"".join(body_parts) if body_parts else "No validated reports were provided.\n"
534-
)
535-
536-
return header + sources
437+
# composer removed

0 commit comments

Comments
 (0)