Skip to content

Commit fa27b30

Browse files
feat: add pre-built image auto-detection to commit0 and gaia benchmarks
Add local_image_exists() to image_utils and use it in commit0/gaia docker workspace paths. If the expected agent-server image already exists locally, use DockerWorkspace directly; otherwise fall back to building on-the-fly with DockerDevWorkspace as before. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d3f24e4 commit fa27b30

File tree

3 files changed

+49
-18
lines changed

3 files changed

+49
-18
lines changed

benchmarks/commit0/run_infer.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
construct_eval_output_dir,
2525
get_default_on_result_writer,
2626
)
27-
from benchmarks.utils.image_utils import image_exists
27+
from benchmarks.utils.image_utils import image_exists, local_image_exists
2828
from benchmarks.utils.llm_config import load_llm_config
2929
from benchmarks.utils.models import (
3030
EvalInstance,
@@ -36,7 +36,7 @@
3636
from openhands.sdk.workspace import RemoteWorkspace
3737
from openhands.tools.delegate import DelegateTool
3838
from openhands.tools.preset.default import get_default_tools
39-
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace
39+
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace, DockerWorkspace
4040

4141

4242
logger = get_logger(__name__)
@@ -188,16 +188,26 @@ def prepare_workspace(
188188
logger.info(f"Using base docker image: {base_docker_image}")
189189

190190
if self.metadata.workspace_type == "docker":
191-
# Build agent-server image from base commit0 image
192-
workspace = DockerDevWorkspace(
193-
base_image=base_docker_image,
194-
working_dir="/workspace",
195-
target=build_target,
196-
forward_env=forward_env or [],
197-
)
198-
logger.info(
199-
f"Building workspace from {base_docker_image}. This may take a while..."
191+
custom_tag = extract_custom_tag(base_docker_image)
192+
suffix = f"-{build_target}" if build_target != "binary" else ""
193+
agent_server_image = (
194+
f"{EVAL_AGENT_SERVER_IMAGE}:{SDK_SHORT_SHA}-{custom_tag}{suffix}"
200195
)
196+
if local_image_exists(agent_server_image):
197+
logger.info(f"Using pre-built image {agent_server_image}")
198+
workspace = DockerWorkspace(
199+
server_image=agent_server_image,
200+
working_dir="/workspace",
201+
forward_env=forward_env or [],
202+
)
203+
else:
204+
logger.info(f"Building workspace from {base_docker_image}...")
205+
workspace = DockerDevWorkspace(
206+
base_image=base_docker_image,
207+
working_dir="/workspace",
208+
target=build_target,
209+
forward_env=forward_env or [],
210+
)
201211
elif self.metadata.workspace_type == "remote":
202212
runtime_api_key = os.getenv("RUNTIME_API_KEY")
203213
if not runtime_api_key:

benchmarks/gaia/run_infer.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
get_default_on_result_writer,
2828
)
2929
from benchmarks.utils.fake_user_response import run_conversation_with_fake_user_response
30-
from benchmarks.utils.image_utils import image_exists
30+
from benchmarks.utils.image_utils import image_exists, local_image_exists
3131
from benchmarks.utils.llm_config import load_llm_config
3232
from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
3333
from benchmarks.utils.version import SDK_SHORT_SHA
@@ -47,7 +47,7 @@
4747
from openhands.sdk.workspace import RemoteWorkspace
4848
from openhands.tools.delegate import DelegateTool
4949
from openhands.tools.preset.default import get_default_tools
50-
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace
50+
from openhands.workspace import APIRemoteWorkspace, DockerDevWorkspace, DockerWorkspace
5151

5252

5353
logger = get_logger(__name__)
@@ -156,12 +156,23 @@ def prepare_workspace(
156156
logger.info(f"Preparing workspace for instance {instance.id}")
157157

158158
if self.metadata.workspace_type == "docker":
159-
# Use DockerDevWorkspace with base image (same as main branch)
160-
workspace = DockerDevWorkspace(
161-
base_image="nikolaik/python-nodejs:python3.12-nodejs22",
162-
working_dir="/workspace",
163-
forward_env=forward_env or [],
159+
agent_server_image = (
160+
f"{EVAL_AGENT_SERVER_IMAGE}:{SDK_SHORT_SHA}-gaia-binary"
164161
)
162+
if local_image_exists(agent_server_image):
163+
logger.info(f"Using pre-built image {agent_server_image}")
164+
workspace = DockerWorkspace(
165+
server_image=agent_server_image,
166+
working_dir="/workspace",
167+
forward_env=forward_env or [],
168+
)
169+
else:
170+
logger.info("Building workspace from nikolaik/python-nodejs:python3.12-nodejs22...")
171+
workspace = DockerDevWorkspace(
172+
base_image="nikolaik/python-nodejs:python3.12-nodejs22",
173+
working_dir="/workspace",
174+
forward_env=forward_env or [],
175+
)
165176
elif self.metadata.workspace_type == "remote":
166177
# For workflow, use APIRemoteWorkspace with pre-built GAIA image
167178
# GAIA uses a universal agent server image (one image for all instances)

benchmarks/utils/image_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env python3
22
import base64
3+
import subprocess
34
import sys
45

56
import requests
@@ -54,6 +55,15 @@ def _ghcr_token(repo: str, username: str | None, pat: str | None) -> str | None:
5455
return None
5556

5657

58+
def local_image_exists(image: str) -> bool:
59+
"""Check if a Docker image exists in the local Docker daemon."""
60+
result = subprocess.run(
61+
["docker", "image", "inspect", image],
62+
capture_output=True,
63+
)
64+
return result.returncode == 0
65+
66+
5767
def image_exists(
5868
image_ref: str,
5969
gh_username: str | None = None,

0 commit comments

Comments
 (0)