22import json
33import os
44import time
5+ from collections .abc import AsyncGenerator , Sequence
56from contextlib import asynccontextmanager
6- from typing import Any , AsyncGenerator , Literal , Sequence
7+ from typing import Any , Literal
78
89import blobfile as bf
9- import numpy as np
1010import structlog .stdlib
1111from dotenv import load_dotenv
1212
4848 get_timestamp ,
4949 is_docker_running ,
5050 purple ,
51+ safe_mean ,
5152)
5253
54+ MIN_UPLOAD_INTERVAL_MESSAGES = 5
55+ MIN_UPLOAD_INTERVAL_SECONDS = 1800
56+
57+
5358GRADER_OPENAI_API_KEY = os .getenv ("GRADER_OPENAI_API_KEY" ) or os .getenv ("OPENAI_API_KEY" )
5459
5560
@@ -64,7 +69,7 @@ class ExternalPythonCodingSolver(PythonCodingSolver):
6469 default = False , doc = "Whether to make the local NVIDIA GPU available to the agent"
6570 )
6671
67- upload_interval_messages : int = chz .field (
72+ upload_interval_messages : int | None = chz .field (
6873 default = None ,
6974 doc = "Upload interval in agent steps for heavy logs" ,
7075 )
@@ -123,16 +128,23 @@ async def _start_computer(self, task: PBTask) -> AsyncGenerator[ComputerInterfac
123128 destinations = ["run" ],
124129 )
125130
126- if self .upload_interval_messages and self .upload_interval_messages <= 5 :
131+ if (
132+ self .upload_interval_messages
133+ and self .upload_interval_messages <= MIN_UPLOAD_INTERVAL_MESSAGES
134+ ):
127135 ctx_logger .warning (
128136 "Uploading artifacts every five messages or less is untested. "
129137 "Consider setting `upload_interval_messages` to a higher value." ,
130138 destinations = ["run" ],
131139 )
132140
133- if self .upload_interval_seconds and self .upload_interval_seconds < 1800 :
141+ if (
142+ self .upload_interval_seconds
143+ and self .upload_interval_seconds < MIN_UPLOAD_INTERVAL_SECONDS
144+ ):
134145 ctx_logger .warning (
135- "Uploading artifacts more frequently than every 1800 seconds is untested. "
146+ "Uploading artifacts more frequently than every"
147+ f" { MIN_UPLOAD_INTERVAL_SECONDS } seconds is untested. "
136148 "Consider setting `upload_interval_seconds` to a higher value." ,
137149 destinations = ["run" ],
138150 )
@@ -257,7 +269,7 @@ async def _run_agent(self, computer: ComputerInterface, task: PBTask) -> AgentOu
257269 )
258270
259271 ctx_logger .info (f"status: { status } " , destinations = ["run" ])
260- start_time = status .get ("created_at" ) if status . get ( "created_at" ) else time .time ()
272+ start_time = status .get ("created_at" , time .time () )
261273 if status .get ("agent_finished_at" ):
262274 end_time = status .get ("agent_finished_at" )
263275 elif status .get ("last_updated" ):
@@ -572,7 +584,7 @@ async def get_full_summary(
572584 }
573585
574586 other_stats = {
575- "repro_mean_time" : np . mean (
587+ "repro_mean_time" : safe_mean (
576588 [
577589 r .reproduction_output .metadata .repro_execution_time # type: ignore
578590 for r in results_clean
@@ -636,16 +648,7 @@ async def get_existing_run_ids(self, run_group_id: str) -> set[str]:
636648 return run_ids
637649
638650 def uses_local_config (self ) -> bool :
639- """
640- Check if any of paperbench.solver.cluster_config, paperbench.reproduction.cluster_config,
641- or paperbench.judge.cluster_config is an instance of LocalConfig.
642-
643- Args:
644- paperbench: A PaperBench PythonCodingEval instance
645-
646- Returns:
647- bool: True if any of the cluster configs is a LocalConfig, False otherwise
648- """
651+ """Return True if any cluster config uses LocalConfig."""
649652
650653 # PythonCodingSolver may not have a cluster_config, just ExternalPythonCodingSolver does for now
651654 if hasattr (self .solver , "cluster_config" ):
0 commit comments