Skip to content

Commit 2996457

Browse files
fix: harden live contracts and resume semantics
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
1 parent 509f4f7 commit 2996457

File tree

7 files changed

+101
-21
lines changed

7 files changed

+101
-21
lines changed

scripts/innovation_loop.py

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
ensure_repo_bootstrap_for_dvc,
3535
ensure_controller_not_running,
3636
load_goal,
37+
load_pending_result,
3738
load_yaml_like,
3839
opencode_agent_model,
3940
proposal_round_path,
@@ -498,6 +499,7 @@ def build_live_proposal_prompt(
498499
cooldowns: dict,
499500
result_packet: dict,
500501
research_context: dict,
502+
primary_proposal: dict | None = None,
501503
) -> str:
502504
if os.environ.get("INNOVATION_LOOP_LIVE_TEST_MODE") == "1":
503505
scripted_choice = "objective" if agent == "Apollo" else "architecture"
@@ -520,7 +522,7 @@ def build_live_proposal_prompt(
520522
"minimal_ablation": ["revert the single scripted change"],
521523
"paper_grounding": grounding,
522524
"redirect_if_underperforming": "切换到正交机制轴并停止重复当前主路线",
523-
"causal_metric_path": "若该动作有效,中间稳定性指标应先改善,再传导到目标指标。",
525+
"causal_metric_path": ["intermediate_stability", "target_metric"],
524526
"failure_signature": "若中间指标不变而目标指标波动,则说明当前机制解释站不住。",
525527
"pivot_after_failure": "切换到正交机制轴并停止重复当前主路线",
526528
},
@@ -547,6 +549,13 @@ def build_live_proposal_prompt(
547549
"research_context": prompt_ready_research_context(research_context, agent),
548550
"paper_grounding_seed": build_paper_grounding(research_context, agent),
549551
}
552+
if primary_proposal is not None:
553+
context["apollo_proposal"] = {
554+
"choice": primary_proposal.get("choice"),
555+
"family": primary_proposal.get("family"),
556+
"mechanism": primary_proposal.get("mechanism"),
557+
"paper_grounding": primary_proposal.get("paper_grounding", []),
558+
}
550559
return f"""
551560
Return exactly one JSON object and nothing else.
552561
@@ -573,6 +582,7 @@ def build_live_proposal_prompt(
573582
- avoid cooldown families when possible
574583
- keep the reply short
575584
- paper_grounding must contain at least two unique paper_id values from the evidence pack
585+
- if Apollo proposal is provided, your family and mechanism axis must be materially orthogonal to Apollo unless no meaningful divergence exists
576586
577587
Context: {json.dumps(context, ensure_ascii=False)}
578588
""".strip()
@@ -584,8 +594,8 @@ def build_guard_prompt(
584594
if os.environ.get("INNOVATION_LOOP_LIVE_TEST_MODE") == "1":
585595
return 'Return exactly {"verdict":"approve","validity_risks":[],"smallest_repair":"","single_change_ok":true,"paper_support_ok":true,"redirect_if_underperforming":"切换到正交机制轴并停止重复当前主路线","failure_signature":"若中间指标不变而目标指标波动,则说明当前机制解释站不住。"}.'
586596
context = {
587-
"primary_choice": primary.get("choice"),
588-
"backup_choice": backup.get("choice") if backup else None,
597+
"primary_proposal": primary,
598+
"backup_proposal": backup,
589599
"research_context": prompt_ready_research_context(research_context, "Athena"),
590600
"primary_grounding": primary.get("paper_grounding", []),
591601
"latest_redirect_hint": primary.get("redirect_if_underperforming"),
@@ -644,6 +654,8 @@ def materialize_live_choice(
644654
template["causal_metric_path"] = raw.get("causal_metric_path") or dict(
645655
research_context.get("innovation_briefs", {})
646656
).get(role.lower(), {}).get("falsifiable_prediction")
657+
if isinstance(template["causal_metric_path"], str):
658+
template["causal_metric_path"] = [template["causal_metric_path"]]
647659
template["failure_signature"] = raw.get("failure_signature") or first_guardrail(
648660
research_context
649661
)
@@ -692,6 +704,7 @@ def collect_live_round_proposals(
692704
cooldowns,
693705
result_packet,
694706
research_context,
707+
primary_proposal=exploit_raw,
695708
),
696709
)
697710
guard = run_opencode_agent(
@@ -836,14 +849,38 @@ def tick(config_path: pathlib.Path, workspace: pathlib.Path, mode: str) -> dict:
836849
)
837850
return {"phase": "poll", "poll": polled}
838851
if polled["status"] == "failed":
839-
session["active_run_id"] = active
852+
judged = run_python(
853+
"judge_result.py",
854+
"--config",
855+
str(config_path),
856+
"--workspace",
857+
str(workspace),
858+
"--run-id",
859+
active,
860+
"--monitor-state",
861+
polled["status"],
862+
cwd=workspace,
863+
)
864+
research_context = {
865+
"config_path": str(research_config_path(workspace)),
866+
"config": load_research_config(research_config_path(workspace)),
867+
}
868+
record_research_feedback(workspace, research_context, judged)
869+
session = load_session(session_file)
870+
session["last_failed_task"] = active
871+
session["active_dvc_task"] = None
840872
set_session_stage(session, "crash_recoverable", f"dvc task {active} failed")
841873
save_session(session_file, session)
842874
write_json(
843875
status_file,
844-
{"phase": "failed", "updated_at": now_iso(), "poll": polled},
876+
{
877+
"phase": "failed",
878+
"updated_at": now_iso(),
879+
"poll": polled,
880+
"judge": judged,
881+
},
845882
)
846-
return {"phase": "failed", "poll": polled}
883+
return {"phase": "failed", "poll": polled, "judge": judged}
847884
judged = run_python(
848885
"judge_result.py",
849886
"--config",
@@ -1071,6 +1108,7 @@ def main() -> None:
10711108
"tick",
10721109
"status",
10731110
"resume",
1111+
"branch-from-checkpoint",
10741112
"stop",
10751113
"_run-controller",
10761114
]:
@@ -1138,7 +1176,7 @@ def main() -> None:
11381176
emit_json(status)
11391177
return
11401178

1141-
if args.command == "resume":
1179+
if args.command in {"resume", "branch-from-checkpoint"}:
11421180
session = load_session(session_path(workspace))
11431181
checkpoint = pathlib.Path(
11441182
workspace / "experiments" / "recovery_checkpoint.json"
@@ -1151,22 +1189,37 @@ def main() -> None:
11511189
if not payload or not payload.get("checkpoint_path"):
11521190
emit_json({"resumed": False, "reason": "no_checkpoint"})
11531191
return
1154-
round_selection = select_round_mutation(
1155-
workspace,
1156-
load_goal(config_path),
1157-
int(session.get("iteration_count", 0)) + 1,
1158-
args.mode,
1159-
collect_round_research_context(
1192+
if args.command == "resume":
1193+
failed_task = str(
1194+
session.get("last_failed_task") or payload.get("run_id") or ""
1195+
)
1196+
mutation = (
1197+
load_pending_result(workspace, failed_task) if failed_task else {}
1198+
)
1199+
if not mutation:
1200+
emit_json({"resumed": False, "reason": "no_failed_pending_result"})
1201+
return
1202+
else:
1203+
round_selection = select_round_mutation(
11601204
workspace,
11611205
load_goal(config_path),
11621206
int(session.get("iteration_count", 0)) + 1,
1163-
),
1164-
)
1165-
mutation = round_selection.get("mutation", round_selection)
1207+
args.mode,
1208+
collect_round_research_context(
1209+
workspace,
1210+
load_goal(config_path),
1211+
int(session.get("iteration_count", 0)) + 1,
1212+
),
1213+
)
1214+
mutation = round_selection.get("mutation", round_selection)
11661215
if mutation.get("review_blocked"):
11671216
emit_json({"resumed": False, "reason": "review_blocked"})
11681217
return
1169-
run_id = f"resume-{int(session.get('iteration_count', 0)) + 1:04d}"
1218+
run_id = (
1219+
f"resume-{int(session.get('iteration_count', 0)) + 1:04d}"
1220+
if args.command == "resume"
1221+
else f"branch-{int(session.get('iteration_count', 0)) + 1:04d}"
1222+
)
11701223
candidate = run_python(
11711224
"run_candidate.py",
11721225
"--config",
@@ -1190,6 +1243,7 @@ def main() -> None:
11901243
emit_json(
11911244
{
11921245
"resumed": True,
1246+
"mode": args.command,
11931247
"candidate": candidate,
11941248
"resume_from": payload["checkpoint_path"],
11951249
}

scripts/kb/build_index.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def build_indexes(
5454
*,
5555
scaffold_missing: bool,
5656
extract_claims: bool,
57+
overwrite_claims: bool,
5758
config: Dict[str, Any],
5859
) -> Dict[str, Any]:
5960
output_dir.mkdir(parents=True, exist_ok=True)
@@ -73,7 +74,9 @@ def build_indexes(
7374
if scaffold_missing:
7475
scaffold_figure_note(paths, meta)
7576
missing_fields = validate_meta(meta)
76-
if extract_claims or not paths.claims.exists():
77+
if (extract_claims and (overwrite_claims or not paths.claims.exists())) or (
78+
not extract_claims and not paths.claims.exists()
79+
):
7780
claims = extract_claims_from_markdown(paths, meta)
7881
write_claims(paths.claims, claims)
7982
else:
@@ -125,6 +128,7 @@ def main() -> None:
125128
parser.add_argument("--config")
126129
parser.add_argument("--scaffold-missing", action="store_true")
127130
parser.add_argument("--extract-claims", action="store_true")
131+
parser.add_argument("--overwrite-claims", action="store_true")
128132
args = parser.parse_args()
129133

130134
workspace_root = resolve_workspace_root(args.workspace_root)
@@ -144,6 +148,7 @@ def main() -> None:
144148
output_dir,
145149
scaffold_missing=args.scaffold_missing,
146150
extract_claims=args.extract_claims,
151+
overwrite_claims=args.overwrite_claims,
147152
config=config,
148153
)
149154
)

scripts/kb/daily_tracker_lite.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def main() -> None:
5252
index_output_dir(workspace_root, config),
5353
scaffold_missing=True,
5454
extract_claims=False,
55+
overwrite_claims=False,
5556
config=config,
5657
)
5758
emit_json(

scripts/kb/retrieve_papers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,16 @@ def main() -> None:
464464
str(goal.get("goal_text") or ""),
465465
str(goal.get("target_metric") or ""),
466466
str(best.get("family") or ""),
467+
" ".join(
468+
str(item.get("failure_signature") or "")
469+
for item in attempts[-5:]
470+
if isinstance(item, dict)
471+
),
472+
" ".join(
473+
str(item.get("reject_reason") or "")
474+
for item in attempts[-5:]
475+
if isinstance(item, dict)
476+
),
467477
" ".join(
468478
str(item.get("family") or "")
469479
for item in attempts[-5:]

scripts/run_candidate.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,12 @@ def main() -> None:
118118
run_id = args.run_id or new_id("candidate")
119119
session = load_session(session_path(workspace))
120120
round_index = int(session.get("iteration_count", 0)) + 1
121-
touched_files = mutation.get("files_to_touch") or [mutation.get("target_file")]
121+
touched_files = (
122+
mutation.get("files_to_touch")
123+
or mutation.get("touched_files")
124+
or [mutation.get("target_file")]
125+
)
126+
touched_files = [path for path in touched_files if path]
122127
save_parent_snapshot(workspace, run_id, touched_files)
123128
execution_result = (
124129
apply_mutation_live(workspace, mutation)
@@ -203,6 +208,11 @@ def main() -> None:
203208
"change_unit": mutation.get("change_unit"),
204209
"proposal_id": mutation.get("proposal_id"),
205210
"family": mutation.get("family"),
211+
"target_file": mutation.get("target_file"),
212+
"files_to_touch": touched_files,
213+
"params": mutation.get("params", {}),
214+
"why_not_parameter_only": mutation.get("why_not_parameter_only"),
215+
"minimal_ablation": mutation.get("minimal_ablation"),
206216
"touched_files": touched_files,
207217
"diff_summary": execution_result.get(
208218
"diff_summary", mutation.get("change_unit")

src/agents/apollo.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Input contract:
1616
1717
Output contract:
1818
- Return strict JSON only.
19-
- Return one primary proposal and one backup proposal.
19+
- Return one proposal object only.
2020
- Each proposal must include: title, family, mechanism, files_to_touch, expected_gain, risk, why_not_parameter_only, minimal_ablation, paper_grounding, redirect_if_underperforming, causal_metric_path, failure_signature, and pivot_after_failure.
2121
- Use \`causal_metric_path\` to name the intermediate metric path that should improve before the final target metric.
2222
- Use \`failure_signature\` to name the observable pattern that would tell the outer loop this route is failing.

src/agents/hermes.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Input contract:
1616
1717
Output contract:
1818
- Return strict JSON only.
19-
- Return one divergent proposal and one backup.
19+
- Return one divergent proposal object only.
2020
- Each proposal must include: title, family, mechanism, files_to_touch, expected_gain, risk, why_not_parameter_only, minimal_ablation, paper_grounding, redirect_if_underperforming, causal_metric_path, failure_signature, and pivot_after_failure.
2121
- Use \`causal_metric_path\` to name the intermediate metric path that should improve before the final target metric.
2222
- Use \`failure_signature\` to name the observable pattern that would tell the outer loop this route is failing.

0 commit comments

Comments
 (0)