diff --git a/AGENTS.md b/AGENTS.md index 3b41d9d..659f601 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -495,6 +495,11 @@ class MetaCognition: Notes: Episode loader and DSL cast dy/dx/fill and mapping entries to int +[X] Step 4.3 UPDATE4 - Submission script handles memory errors with fallback + Date: 2025-09-13 + Test Result: pytest tests/test_solve_with_budget_memory.py -q + Notes: solve_with_budget catches MemoryError, reports memerror count, runs gc per task + ``` diff --git a/arc_submit.py b/arc_submit.py index 4ab2760..9f99e30 100644 --- a/arc_submit.py +++ b/arc_submit.py @@ -8,6 +8,7 @@ from __future__ import annotations +import gc import json import os import random @@ -72,7 +73,7 @@ def solve_with_budget(task: Dict[str, Any], solver: ARCSolver) -> Tuple[List[Dic dictionaries of the form ``{"output": grid}`` and ``metadata`` contains diagnostic information such as elapsed time and timeout flag. """ - + # [S:ALG v1] fallback=best_so_far memlimit=soft pass _set_mem_limit() signal.signal(signal.SIGALRM, _alarm) signal.alarm(int(HARD_TIMEOUT_SEC)) @@ -80,11 +81,24 @@ def solve_with_budget(task: Dict[str, Any], solver: ARCSolver) -> Tuple[List[Dic try: attempt1, attempt2 = solver.solve_task_two_attempts(task) elapsed = time.time() - start - return [{"output": attempt1}, {"output": attempt2}], {"elapsed": elapsed, "timeout": False} + return [ + {"output": attempt1}, + {"output": attempt2}, + ], {"elapsed": elapsed, "timeout": False, "memerror": False} except Timeout: best = solver.best_so_far(task) elapsed = time.time() - start - return [{"output": best}, {"output": best}], {"elapsed": elapsed, "timeout": True} + return [ + {"output": best}, + {"output": best}, + ], {"elapsed": elapsed, "timeout": True, "memerror": False} + except MemoryError: + best = solver.best_so_far(task) + elapsed = time.time() - start + return [ + {"output": best}, + {"output": best}, + ], {"elapsed": elapsed, "timeout": False, "memerror": True} finally: signal.alarm(0) @@ -95,19 +109,23 @@ def main() -> None: solver = ARCSolver(use_enhancements=True) solutions: Dict[str, Dict[str, List[List[int]]]] = {} + mem_error_count = 0 for task_id, task in data.items(): attempts, meta = solve_with_budget(task, solver) solutions[task_id] = { "attempt_1": attempts[0]["output"], "attempt_2": attempts[1]["output"], } + if meta.get("memerror"): + mem_error_count += 1 print( - f"[task {task_id}] t={meta['elapsed']:.2f}s timeout={meta['timeout']}", + f"[task {task_id}] t={meta['elapsed']:.2f}s timeout={meta['timeout']} memerror={meta['memerror']}", file=sys.stderr, ) + gc.collect() path = save_submission(solutions, "submission.json") - print(f"Saved {path} with {len(solutions)} tasks.") + print(f"Saved {path} with {len(solutions)} tasks. memory_errors={mem_error_count}") if __name__ == "__main__": diff --git a/tests/test_solve_with_budget_memory.py b/tests/test_solve_with_budget_memory.py new file mode 100644 index 0000000..0477fa1 --- /dev/null +++ b/tests/test_solve_with_budget_memory.py @@ -0,0 +1,20 @@ +import sys +from pathlib import Path + +import pytest + +sys.path.append(str(Path(__file__).resolve().parents[1])) +from arc_submit import solve_with_budget + +class DummySolver: + def solve_task_two_attempts(self, task): + raise MemoryError("boom") + + def best_so_far(self, task): + return [[0]] + +def test_memory_error_fallback(): + attempts, meta = solve_with_budget({}, DummySolver()) + assert attempts[0]["output"] == [[0]] + assert meta["memerror"] is True + assert meta["timeout"] is False