Skip to content

Commit 7807fef

Browse files
fix: pin SDK to proven-working SHA and fix build regression (#504)
Three-part fix for the 500-image build regression introduced in #456: 1. Pin SDK submodule to 30819566 (proven in 34-min 500-image build) - The regression between bde715c1 and this SHA has been fixed upstream - Images with this SHA already exist in the GHCR registry - Restores fast warm-cache builds (~34 minutes vs 5+ hours) 2. Replace openhands.sdk.get_logger with stdlib logging in build modules - build_utils.py, buildx_utils.py, image_utils.py, build_images.py - Prevents Rich console state from being inherited across ProcessPoolExecutor forks (deadlock fix) 3. Add cold-cache survivability improvements to CI workflows - timeout-minutes: 180 on both swebench and swtbench build jobs - Post-build disk/timing instrumentation for observability - Preflight BuildKit prune + disk check for swtbench (was missing) - BUILDKIT_RESET_ON_FAILURE for swtbench build step Fixes #504 Refs #502, #503 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e6f6da4 commit 7807fef

File tree

9 files changed

+97
-15
lines changed

9 files changed

+97
-15
lines changed

.github/workflows/build-swebench-images.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ jobs:
7878
7979
runs-on:
8080
labels: blacksmith-32vcpu-ubuntu-2204
81+
timeout-minutes: 180
8182

8283
# Allow pushing to GHCR and commenting on issues
8384
permissions:
@@ -86,6 +87,11 @@ jobs:
8687
issues: write
8788

8889
steps:
90+
- name: Record build start time
91+
run: |
92+
echo "BUILD_START=$(date +%s)" >> "$GITHUB_ENV"
93+
echo "Build started at $(date -u)"
94+
8995
- name: Determine checkout ref
9096
id: checkout-ref
9197
run: |
@@ -249,6 +255,21 @@ jobs:
249255
BUILDKIT_PROGRESS: plain
250256
BUILDKIT_RESET_ON_FAILURE: 1
251257

258+
- name: Post-build disk and timing report
259+
if: always()
260+
run: |
261+
set -euo pipefail
262+
BUILD_END=$(date +%s)
263+
ELAPSED=$(( BUILD_END - ${BUILD_START:-$BUILD_END} ))
264+
echo "## Build Timing" >> "$GITHUB_STEP_SUMMARY"
265+
echo "**Elapsed:** $((ELAPSED / 60))m $((ELAPSED % 60))s" >> "$GITHUB_STEP_SUMMARY"
266+
echo "" >> "$GITHUB_STEP_SUMMARY"
267+
268+
echo "## Disk Usage After Build" >> "$GITHUB_STEP_SUMMARY"
269+
df -h / /var/lib/buildkit 2>/dev/null | tee -a "$GITHUB_STEP_SUMMARY" || true
270+
echo "" >> "$GITHUB_STEP_SUMMARY"
271+
docker buildx du --verbose 2>/dev/null | head -40 | tee -a "$GITHUB_STEP_SUMMARY" || true
272+
252273
- name: Archive build logs
253274
if: always()
254275
run: |

.github/workflows/build-swtbench-images.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ jobs:
7272
7373
runs-on:
7474
labels: blacksmith-32vcpu-ubuntu-2204
75+
timeout-minutes: 180
7576

7677
permissions:
7778
contents: read
@@ -88,6 +89,11 @@ jobs:
8889
SELECT_FILE: ''
8990

9091
steps:
92+
- name: Record build start time
93+
run: |
94+
echo "BUILD_START=$(date +%s)" >> "$GITHUB_ENV"
95+
echo "Build started at $(date -u)"
96+
9197
- name: Determine checkout ref
9298
id: checkout-ref
9399
run: |
@@ -134,6 +140,35 @@ jobs:
134140
run: |
135141
make build
136142
143+
- name: "Preflight: prune cache and verify BuildKit disk"
144+
run: |
145+
set -euo pipefail
146+
KEEP_GB=60
147+
echo "Pruning BuildKit cache (target max-storage ${KEEP_GB} GiB, no filters)..."
148+
if ! docker buildx prune --all --force --max-storage ${KEEP_GB}g; then
149+
docker buildx prune --all --force --keep-storage ${KEEP_GB}g || true
150+
fi
151+
152+
if df -B1 /var/lib/buildkit > /tmp/buildkit_df 2>/dev/null; then
153+
LINE=$(tail -n1 /tmp/buildkit_df)
154+
TOTAL=$(echo "$LINE" | awk '{print $2}')
155+
USED=$(echo "$LINE" | awk '{print $3}')
156+
FREE=$(echo "$LINE" | awk '{print $4}')
157+
if [ -n "$TOTAL" ] && [ -n "$FREE" ]; then
158+
PCT=$(( 100 * USED / TOTAL ))
159+
echo "BuildKit disk: used ${USED} / ${TOTAL} bytes (${PCT}%); free ${FREE} bytes"
160+
MIN=$((75 * 1024 * 1024 * 1024))
161+
if [ "$FREE" -lt "$MIN" ]; then
162+
echo "::error::Not enough free space on /var/lib/buildkit (${FREE} bytes free, need >= ${MIN})"
163+
exit 1
164+
fi
165+
else
166+
echo "Warning: unable to parse df output for /var/lib/buildkit"
167+
fi
168+
else
169+
echo "Warning: /var/lib/buildkit not found; skipping disk check"
170+
fi
171+
137172
- name: Build and push SWT-Bench images
138173
run: |
139174
set -euo pipefail
@@ -181,6 +216,7 @@ jobs:
181216
env:
182217
DOCKER_BUILDKIT: 1
183218
BUILDKIT_PROGRESS: plain
219+
BUILDKIT_RESET_ON_FAILURE: 1
184220

185221
- name: Build prebaked eval env images
186222
if: ${{ inputs.build-eval-env == 'true' }}
@@ -240,6 +276,21 @@ jobs:
240276
docker ps -a || true
241277
docker system df || true
242278
279+
- name: Post-build disk and timing report
280+
if: always()
281+
run: |
282+
set -euo pipefail
283+
BUILD_END=$(date +%s)
284+
ELAPSED=$(( BUILD_END - ${BUILD_START:-$BUILD_END} ))
285+
echo "## Build Timing" >> "$GITHUB_STEP_SUMMARY"
286+
echo "**Elapsed:** $((ELAPSED / 60))m $((ELAPSED % 60))s" >> "$GITHUB_STEP_SUMMARY"
287+
echo "" >> "$GITHUB_STEP_SUMMARY"
288+
289+
echo "## Disk Usage After Build" >> "$GITHUB_STEP_SUMMARY"
290+
df -h / /var/lib/buildkit 2>/dev/null | tee -a "$GITHUB_STEP_SUMMARY" || true
291+
echo "" >> "$GITHUB_STEP_SUMMARY"
292+
docker buildx du --verbose 2>/dev/null | head -40 | tee -a "$GITHUB_STEP_SUMMARY" || true
293+
243294
- name: Archive build logs
244295
if: always()
245296
run: |

benchmarks/swebench/build_images.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
)
2424
from benchmarks.utils.dataset import get_dataset
2525
from benchmarks.utils.image_utils import remote_image_exists
26-
from openhands.sdk import get_logger
2726

27+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
28+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
29+
import logging
2830

29-
logger = get_logger(__name__)
31+
logger = logging.getLogger(__name__)
3032
WRAPPER_DOCKERFILE = Path(__file__).with_name("Dockerfile.swebench-deps")
3133

3234

benchmarks/swtbench/build_eval_env_images.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from benchmarks.swtbench.image_utils import ensure_swt_bench_repo
1414
from benchmarks.utils.dataset import get_dataset
1515
from benchmarks.utils.image_utils import remote_image_exists
16-
from openhands.sdk import get_logger
1716

17+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
18+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
19+
import logging
1820

19-
logger = get_logger(__name__)
21+
logger = logging.getLogger(__name__)
2022

2123

2224
def select_instance_ids(

benchmarks/swtbench/image_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
from typing import Iterable
1010

1111
from benchmarks.swtbench.config import EVAL_DEFAULTS
12-
from openhands.sdk import get_logger
1312

13+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
14+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
15+
import logging
1416

15-
logger = get_logger(__name__)
17+
logger = logging.getLogger(__name__)
1618

1719

1820
def ensure_swt_bench_repo(cache_dir: Path | None = None) -> Path:

benchmarks/utils/build_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@
2929
)
3030
from benchmarks.utils.constants import EVAL_AGENT_SERVER_IMAGE
3131
from benchmarks.utils.image_utils import local_image_exists, remote_image_exists
32-
from openhands.sdk import get_logger
3332

33+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
34+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
35+
import logging
3436

35-
logger = get_logger(__name__)
37+
logger = logging.getLogger(__name__)
3638

3739

3840
class BuildOutput(BaseModel):

benchmarks/utils/buildx_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
import time
1212
from pathlib import Path
1313

14-
from openhands.sdk import get_logger
14+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
15+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
16+
import logging
1517

16-
17-
logger = get_logger(__name__)
18+
logger = logging.getLogger(__name__)
1819

1920

2021
def _read_reset_state(path: Path) -> dict[str, float]:

benchmarks/utils/image_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414

1515
import requests
1616

17-
from openhands.sdk import get_logger
17+
# Use stdlib logging instead of openhands.sdk.get_logger to avoid initializing
18+
# Rich console state before ProcessPoolExecutor forks (causes deadlocks).
19+
import logging
1820

19-
20-
logger = get_logger(__name__)
21+
logger = logging.getLogger(__name__)
2122

2223

2324
ACCEPT = ",".join(

vendor/software-agent-sdk

Submodule software-agent-sdk updated 141 files

0 commit comments

Comments
 (0)