|
| 1 | +#!/usr/bin/env bash |
| 2 | +# [S:ALG v1] runner=chunked_public_eval pass |
| 3 | +set -euo pipefail |
| 4 | + |
| 5 | +ROOT="${ROOT:-$(pwd)}" |
| 6 | +PY="${PY:-python3}" |
| 7 | +BATCH="${BATCH:-50}" # tasks per chunk (tune if memory is tight) |
| 8 | +OUT="${OUT:-submission/full_submission.json}" |
| 9 | +LOGDIR="$ROOT/runlogs" |
| 10 | +mkdir -p "$LOGDIR" "$(dirname "$OUT")" |
| 11 | + |
| 12 | +# Memory-friendly defaults |
| 13 | +export PYTHONUNBUFFERED=1 PYTHONMALLOC=malloc MALLOC_ARENA_MAX=2 |
| 14 | +export OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 NUMEXPR_NUM_THREADS=1 |
| 15 | + |
| 16 | +# 1) Ensure sitecustomize.py exists (float32 + trim) |
| 17 | +if [[ ! -f "$ROOT/sitecustomize.py" ]]; then |
| 18 | + cat > "$ROOT/sitecustomize.py" <<'PY' |
| 19 | +import os, atexit, ctypes, numpy as np |
| 20 | +os.environ.setdefault("OPENBLAS_NUM_THREADS","1") |
| 21 | +os.environ.setdefault("OMP_NUM_THREADS","1") |
| 22 | +os.environ.setdefault("MKL_NUM_THREADS","1") |
| 23 | +os.environ.setdefault("NUMEXPR_NUM_THREADS","1") |
| 24 | +_orig = np.random.Generator.standard_normal |
| 25 | +def _stdnorm(self, size=None, dtype=np.float32, out=None): # default float32 |
| 26 | + return _orig(self, size=size, dtype=dtype, out=out) |
| 27 | +np.random.Generator.standard_normal = _stdnorm |
| 28 | +try: |
| 29 | + libc = ctypes.CDLL("libc.so.6") |
| 30 | + atexit.register(lambda: libc.malloc_trim(0)) |
| 31 | +except Exception: |
| 32 | + pass |
| 33 | +PY |
| 34 | +fi |
| 35 | + |
| 36 | +# 2) Chunked submission using a pure-Python runner (no --only flag required) |
| 37 | +"$PY" - "$BATCH" "$OUT" <<'PY' |
| 38 | +import json, os, sys, time, traceback |
| 39 | +from pathlib import Path |
| 40 | +
|
| 41 | +BATCH = int(sys.argv[1]) |
| 42 | +OUT = sys.argv[2] |
| 43 | +ROOT = Path(os.getcwd()) |
| 44 | +sys.path.append(str(ROOT)) |
| 45 | +from arc_solver.solver import solve_task # repo API |
| 46 | +
|
| 47 | +def loadj(p): |
| 48 | + with open(p,"r") as f: return json.load(f) |
| 49 | +
|
| 50 | +eval_ch = loadj(ROOT/"data/arc-agi_evaluation_challenges.json") |
| 51 | +
|
| 52 | +# Build {task_id: task_obj} |
| 53 | +E = {} |
| 54 | +if isinstance(eval_ch, list): |
| 55 | + for it in eval_ch: |
| 56 | + tid = it.get("task_id") or it.get("id") |
| 57 | + if tid is not None: E[str(tid)] = it |
| 58 | +elif isinstance(eval_ch, dict): |
| 59 | + for k,v in eval_ch.items(): |
| 60 | + E[str(k)] = v |
| 61 | +
|
| 62 | +ids = sorted(E.keys()) |
| 63 | +chunks = [ids[i:i+BATCH] for i in range(0, len(ids), BATCH)] |
| 64 | +all_preds = [] |
| 65 | +start = time.time() |
| 66 | +
|
| 67 | +for ci, chunk in enumerate(chunks, 1): |
| 68 | + t0 = time.time() |
| 69 | + ok = 0 |
| 70 | + for tid in chunk: |
| 71 | + task = E[tid] |
| 72 | + try: |
| 73 | + pred = solve_task(task) # returns list-of-test-grids (or a single grid) |
| 74 | + if pred and isinstance(pred[0], (list, tuple)) and pred and isinstance(pred[0][0], (list, tuple)): |
| 75 | + # single 2D grid -> wrap |
| 76 | + if all(isinstance(r,(list,tuple)) and r and isinstance(r[0],(int,float)) for r in pred): |
| 77 | + pred = [pred] |
| 78 | + all_preds.append({"task_id": tid, "outputs": pred}) |
| 79 | + ok += 1 |
| 80 | + except Exception as e: |
| 81 | + # record empty prediction on error to keep submission shape stable |
| 82 | + all_preds.append({"task_id": tid, "outputs": []}) |
| 83 | + dt = time.time()-t0 |
| 84 | + print(f"[chunk {ci}/{len(chunks)}] solved {ok}/{len(chunk)} in {dt:.1f}s", flush=True) |
| 85 | +
|
| 86 | +# Write final submission |
| 87 | +os.makedirs(os.path.dirname(OUT), exist_ok=True) |
| 88 | +with open(OUT, "w") as f: |
| 89 | + json.dump(all_preds, f) |
| 90 | +print(f"Wrote {OUT} with {len(all_preds)} items in {time.time()-start:.1f}s", flush=True) |
| 91 | +PY |
| 92 | + |
| 93 | +# 3) Score against public eval solutions (if present) |
| 94 | +if [[ -f data/arc-agi_evaluation_solutions.json ]]; then |
| 95 | + "$PY" - <<'PY' |
| 96 | +import json |
| 97 | +from pathlib import Path |
| 98 | +
|
| 99 | +sub = json.load(open("submission/full_submission.json")) |
| 100 | +sol = json.load(open("data/arc-agi_evaluation_solutions.json")) |
| 101 | +
|
| 102 | +def norm(grids): |
| 103 | + if grids and isinstance(grids[0], (list,tuple)) and grids and isinstance(grids[0][0], (list,tuple)): |
| 104 | + if all(isinstance(r,(list,tuple)) and r and isinstance(r[0],(int,float)) for r in grids): |
| 105 | + grids = [grids] |
| 106 | + return grids |
| 107 | +
|
| 108 | +pred = {} |
| 109 | +if isinstance(sub, list): |
| 110 | + for it in sub: |
| 111 | + tid = str(it.get("task_id") or it.get("id")) |
| 112 | + out = it.get("outputs") or it.get("output") |
| 113 | + if tid is not None and out is not None: |
| 114 | + pred[tid] = norm(out) |
| 115 | +
|
| 116 | +gt = {} |
| 117 | +if isinstance(sol, list): |
| 118 | + for it in sol: |
| 119 | + tid = str(it.get("task_id") or it.get("id")) |
| 120 | + out = it.get("solutions") or it.get("outputs") or it.get("solution") |
| 121 | + if tid is not None and out is not None: |
| 122 | + gt[tid] = norm(out) |
| 123 | +elif isinstance(sol, dict): |
| 124 | + for k,v in sol.items(): |
| 125 | + gt[str(k)] = norm(v) |
| 126 | +
|
| 127 | +ids = sorted(set(pred) & set(gt)) |
| 128 | +ok = 0 |
| 129 | +for tid in ids: |
| 130 | + p, g = pred[tid], gt[tid] |
| 131 | + if len(p)==len(g) and all(pp==gg for pp,gg in zip(p,g)): |
| 132 | + ok += 1 |
| 133 | +total = len(ids) |
| 134 | +pct = (ok/total*100.0) if total else 0.0 |
| 135 | +print(f"EVAL SCORE (public): {ok}/{total} = {pct:.2f}%") |
| 136 | +PY |
| 137 | +else |
| 138 | + echo "Note: public solutions not found at data/arc-agi_evaluation_solutions.json; skipping score." |
| 139 | +fi |
| 140 | + |
| 141 | +echo "Full submission at: $OUT" |
0 commit comments