Merge pull request #15 from tylerbessire/codex/fix-memoryerror-during-submission-process

tylerbessire · web-flow · commit 02c2d34ca4c5 · 2025-09-13T00:01:14.000-07:00
Handle MemoryError in submission pipeline
diff --git a/AGENTS.md b/AGENTS.md
@@ -495,6 +495,11 @@ class MetaCognition:
     Notes: Episode loader and DSL cast dy/dx/fill and mapping entries to int
 
 
+[X] Step 4.3 UPDATE4 - Submission script handles memory errors with fallback
+    Date: 2025-09-13
+    Test Result: pytest tests/test_solve_with_budget_memory.py -q
+    Notes: solve_with_budget catches MemoryError, reports memerror count, runs gc per task
+
 
 ```
 
diff --git a/arc_submit.py b/arc_submit.py
@@ -8,6 +8,7 @@
 
 from __future__ import annotations
 
+import gc
 import json
 import os
 import random
@@ -72,19 +73,32 @@ def solve_with_budget(task: Dict[str, Any], solver: ARCSolver) -> Tuple[List[Dic
         dictionaries of the form ``{"output": grid}`` and ``metadata``
         contains diagnostic information such as elapsed time and timeout flag.
     """
-
+    # [S:ALG v1] fallback=best_so_far memlimit=soft pass
     _set_mem_limit()
     signal.signal(signal.SIGALRM, _alarm)
     signal.alarm(int(HARD_TIMEOUT_SEC))
     start = time.time()
     try:
         attempt1, attempt2 = solver.solve_task_two_attempts(task)
         elapsed = time.time() - start
-        return [{"output": attempt1}, {"output": attempt2}], {"elapsed": elapsed, "timeout": False}
+        return [
+            {"output": attempt1},
+            {"output": attempt2},
+        ], {"elapsed": elapsed, "timeout": False, "memerror": False}
     except Timeout:
         best = solver.best_so_far(task)
         elapsed = time.time() - start
-        return [{"output": best}, {"output": best}], {"elapsed": elapsed, "timeout": True}
+        return [
+            {"output": best},
+            {"output": best},
+        ], {"elapsed": elapsed, "timeout": True, "memerror": False}
+    except MemoryError:
+        best = solver.best_so_far(task)
+        elapsed = time.time() - start
+        return [
+            {"output": best},
+            {"output": best},
+        ], {"elapsed": elapsed, "timeout": False, "memerror": True}
     finally:
         signal.alarm(0)
 
@@ -95,19 +109,23 @@ def main() -> None:
     solver = ARCSolver(use_enhancements=True)
     solutions: Dict[str, Dict[str, List[List[int]]]] = {}
 
+    mem_error_count = 0
     for task_id, task in data.items():
         attempts, meta = solve_with_budget(task, solver)
         solutions[task_id] = {
             "attempt_1": attempts[0]["output"],
             "attempt_2": attempts[1]["output"],
         }
+        if meta.get("memerror"):
+            mem_error_count += 1
         print(
-            f"[task {task_id}] t={meta['elapsed']:.2f}s timeout={meta['timeout']}",
+            f"[task {task_id}] t={meta['elapsed']:.2f}s timeout={meta['timeout']} memerror={meta['memerror']}",
             file=sys.stderr,
         )
+        gc.collect()
 
     path = save_submission(solutions, "submission.json")
-    print(f"Saved {path} with {len(solutions)} tasks.")
+    print(f"Saved {path} with {len(solutions)} tasks. memory_errors={mem_error_count}")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_solve_with_budget_memory.py b/tests/test_solve_with_budget_memory.py
@@ -0,0 +1,20 @@
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.append(str(Path(__file__).resolve().parents[1]))
+from arc_submit import solve_with_budget
+
+class DummySolver:
+    def solve_task_two_attempts(self, task):
+        raise MemoryError("boom")
+
+    def best_so_far(self, task):
+        return [[0]]
+
+def test_memory_error_fallback():
+    attempts, meta = solve_with_budget({}, DummySolver())
+    assert attempts[0]["output"] == [[0]]
+    assert meta["memerror"] is True
+    assert meta["timeout"] is False