11"""
22Evaluator for circle packing example (n=26) with improved timeout handling
33"""
4+
45import importlib .util
56import numpy as np
67import time
1213import sys
1314import pickle
1415
16+
1517class TimeoutError (Exception ):
1618 pass
1719
20+
1821def timeout_handler (signum , frame ):
1922 """Handle timeout signal"""
2023 raise TimeoutError ("Function execution timed out" )
2124
25+
2226def validate_packing (centers , radii ):
2327 """
2428 Validate that circles don't overlap and are inside the unit square
25-
29+
2630 Args:
2731 centers: np.array of shape (n, 2) with (x, y) coordinates
2832 radii: np.array of shape (n) with radius of each circle
29-
33+
3034 Returns:
3135 True if valid, False otherwise
3236 """
3337 n = centers .shape [0 ]
34-
38+
3539 # Check if circles are inside the unit square
3640 for i in range (n ):
3741 x , y = centers [i ]
3842 r = radii [i ]
3943 if x - r < - 1e-6 or x + r > 1 + 1e-6 or y - r < - 1e-6 or y + r > 1 + 1e-6 :
4044 print (f"Circle { i } at ({ x } , { y } ) with radius { r } is outside the unit square" )
4145 return False
42-
46+
4347 # Check for overlaps
4448 for i in range (n ):
45- for j in range (i + 1 , n ):
46- dist = np .sqrt (np .sum ((centers [i ] - centers [j ])** 2 ))
49+ for j in range (i + 1 , n ):
50+ dist = np .sqrt (np .sum ((centers [i ] - centers [j ]) ** 2 ))
4751 if dist < radii [i ] + radii [j ] - 1e-6 : # Allow for tiny numerical errors
4852 print (f"Circles { i } and { j } overlap: dist={ dist } , r1+r2={ radii [i ]+ radii [j ]} " )
4953 return False
50-
54+
5155 return True
5256
57+
5358def run_with_timeout (program_path , timeout_seconds = 20 ):
5459 """
5560 Run the program in a separate process with timeout
5661 using a simple subprocess approach
57-
62+
5863 Args:
5964 program_path: Path to the program file
6065 timeout_seconds: Maximum execution time in seconds
61-
66+
6267 Returns:
6368 centers, radii, sum_radii tuple from the program
6469 """
6570 # Create a temporary file to execute
66- with tempfile .NamedTemporaryFile (suffix = ' .py' , delete = False ) as temp_file :
71+ with tempfile .NamedTemporaryFile (suffix = " .py" , delete = False ) as temp_file :
6772 # Write a script that executes the program and saves results
6873 script = f"""
6974import sys
@@ -111,124 +116,126 @@ def run_with_timeout(program_path, timeout_seconds=20):
111116"""
112117 temp_file .write (script .encode ())
113118 temp_file_path = temp_file .name
114-
119+
115120 results_path = f"{ temp_file_path } .results"
116-
121+
117122 try :
118123 # Run the script with timeout
119124 process = subprocess .Popen (
120- [sys .executable , temp_file_path ],
121- stdout = subprocess .PIPE ,
122- stderr = subprocess .PIPE
125+ [sys .executable , temp_file_path ], stdout = subprocess .PIPE , stderr = subprocess .PIPE
123126 )
124-
127+
125128 try :
126129 stdout , stderr = process .communicate (timeout = timeout_seconds )
127130 exit_code = process .returncode
128-
131+
129132 # Always print output for debugging purposes
130133 print (f"Subprocess stdout: { stdout .decode ()} " )
131134 if stderr :
132135 print (f"Subprocess stderr: { stderr .decode ()} " )
133-
136+
134137 # Still raise an error for non-zero exit codes, but only after printing the output
135138 if exit_code != 0 :
136139 raise RuntimeError (f"Process exited with code { exit_code } " )
137-
140+
138141 # Load the results
139142 if os .path .exists (results_path ):
140- with open (results_path , 'rb' ) as f :
143+ with open (results_path , "rb" ) as f :
141144 results = pickle .load (f )
142-
145+
143146 # Check if an error was returned
144- if ' error' in results :
147+ if " error" in results :
145148 raise RuntimeError (f"Program execution failed: { results ['error' ]} " )
146-
147- return results [' centers' ], results [' radii' ], results [' sum_radii' ]
149+
150+ return results [" centers" ], results [" radii" ], results [" sum_radii" ]
148151 else :
149152 raise RuntimeError ("Results file not found" )
150-
153+
151154 except subprocess .TimeoutExpired :
152155 # Kill the process if it times out
153156 process .kill ()
154157 process .wait ()
155158 raise TimeoutError (f"Process timed out after { timeout_seconds } seconds" )
156-
159+
157160 finally :
158161 # Clean up temporary files
159162 if os .path .exists (temp_file_path ):
160163 os .unlink (temp_file_path )
161164 if os .path .exists (results_path ):
162165 os .unlink (results_path )
163166
167+
164168def evaluate (program_path ):
165169 """
166170 Evaluate the program by running it once and checking the sum of radii
167-
171+
168172 Args:
169173 program_path: Path to the program file
170-
174+
171175 Returns:
172176 Dictionary of metrics
173177 """
174178 # Target value from the paper
175179 TARGET_VALUE = 2.635 # AlphaEvolve result for n=26
176-
180+
177181 try :
178182 # For constructor-based approaches, a single evaluation is sufficient
179183 # since the result is deterministic
180184 start_time = time .time ()
181-
185+
182186 # Use subprocess to run with timeout
183187 centers , radii , reported_sum = run_with_timeout (
184- program_path ,
185- timeout_seconds = 15 # Single timeout
188+ program_path , timeout_seconds = 15 # Single timeout
186189 )
187-
190+
188191 end_time = time .time ()
189192 eval_time = end_time - start_time
190-
193+
191194 # Ensure centers and radii are numpy arrays
192195 if not isinstance (centers , np .ndarray ):
193196 centers = np .array (centers )
194197 if not isinstance (radii , np .ndarray ):
195198 radii = np .array (radii )
196-
199+
197200 # Validate solution
198201 valid = validate_packing (centers , radii )
199-
202+
200203 # Check shape and size
201- shape_valid = ( centers .shape == (26 , 2 ) and radii .shape == (26 ,) )
204+ shape_valid = centers .shape == (26 , 2 ) and radii .shape == (26 ,)
202205 if not shape_valid :
203- print (f"Invalid shapes: centers={ centers .shape } , radii={ radii .shape } , expected (26, 2) and (26,)" )
206+ print (
207+ f"Invalid shapes: centers={ centers .shape } , radii={ radii .shape } , expected (26, 2) and (26,)"
208+ )
204209 valid = False
205-
210+
206211 # Calculate sum
207212 sum_radii = np .sum (radii ) if valid else 0.0
208-
213+
209214 # Make sure reported_sum matches the calculated sum
210215 if abs (sum_radii - reported_sum ) > 1e-6 :
211216 print (f"Warning: Reported sum { reported_sum } doesn't match calculated sum { sum_radii } " )
212-
217+
213218 # Target ratio (how close we are to the target)
214219 target_ratio = sum_radii / TARGET_VALUE if valid else 0.0
215-
220+
216221 # Validity score
217222 validity = 1.0 if valid else 0.0
218-
223+
219224 # Combined score - higher is better
220225 combined_score = target_ratio * validity
221-
222- print (f"Evaluation: valid={ valid } , sum_radii={ sum_radii :.6f} , target={ TARGET_VALUE } , ratio={ target_ratio :.6f} , time={ eval_time :.2f} s" )
223-
226+
227+ print (
228+ f"Evaluation: valid={ valid } , sum_radii={ sum_radii :.6f} , target={ TARGET_VALUE } , ratio={ target_ratio :.6f} , time={ eval_time :.2f} s"
229+ )
230+
224231 return {
225232 "sum_radii" : float (sum_radii ),
226233 "target_ratio" : float (target_ratio ),
227234 "validity" : float (validity ),
228235 "eval_time" : float (eval_time ),
229- "combined_score" : float (combined_score )
236+ "combined_score" : float (combined_score ),
230237 }
231-
238+
232239 except Exception as e :
233240 print (f"Evaluation failed completely: { str (e )} " )
234241 traceback .print_exc ()
@@ -237,9 +244,10 @@ def evaluate(program_path):
237244 "target_ratio" : 0.0 ,
238245 "validity" : 0.0 ,
239246 "eval_time" : 0.0 ,
240- "combined_score" : 0.0
247+ "combined_score" : 0.0 ,
241248 }
242249
250+
243251# Stage-based evaluation for cascade evaluation
244252def evaluate_stage1 (program_path ):
245253 """
@@ -248,55 +256,53 @@ def evaluate_stage1(program_path):
248256 try :
249257 # Use the simplified subprocess approach
250258 try :
251- centers , radii , sum_radii = run_with_timeout (
252- program_path ,
253- timeout_seconds = 10
254- )
255-
259+ centers , radii , sum_radii = run_with_timeout (program_path , timeout_seconds = 10 )
260+
256261 # Ensure centers and radii are numpy arrays
257262 if not isinstance (centers , np .ndarray ):
258263 centers = np .array (centers )
259264 if not isinstance (radii , np .ndarray ):
260265 radii = np .array (radii )
261-
266+
262267 # Validate solution (shapes and constraints)
263- shape_valid = ( centers .shape == (26 , 2 ) and radii .shape == (26 ,) )
268+ shape_valid = centers .shape == (26 , 2 ) and radii .shape == (26 ,)
264269 if not shape_valid :
265270 print (f"Invalid shapes: centers={ centers .shape } , radii={ radii .shape } " )
266271 return {"validity" : 0.0 , "error" : "Invalid shapes" }
267-
272+
268273 valid = validate_packing (centers , radii )
269-
274+
270275 # Calculate sum
271276 actual_sum = np .sum (radii ) if valid else 0.0
272-
277+
273278 # Target from paper
274279 target = 2.635
275-
280+
276281 # Simple combined score for stage 1
277282 combined_score = (actual_sum / target ) if valid else 0.0
278-
283+
279284 # Return evaluation metrics
280285 return {
281286 "validity" : 1.0 if valid else 0.0 ,
282287 "sum_radii" : float (actual_sum ),
283288 "target_ratio" : float (actual_sum / target if valid else 0.0 ),
284- "combined_score" : float (combined_score )
289+ "combined_score" : float (combined_score ),
285290 }
286-
291+
287292 except TimeoutError as e :
288293 print (f"Stage 1 evaluation timed out: { e } " )
289294 return {"validity" : 0.0 , "combined_score" : 0.0 , "error" : "Timeout" }
290295 except Exception as e :
291296 print (f"Stage 1 evaluation failed: { e } " )
292297 print (traceback .format_exc ())
293298 return {"validity" : 0.0 , "combined_score" : 0.0 , "error" : str (e )}
294-
299+
295300 except Exception as e :
296301 print (f"Stage 1 evaluation failed completely: { e } " )
297302 print (traceback .format_exc ())
298303 return {"validity" : 0.0 , "combined_score" : 0.0 , "error" : str (e )}
299304
305+
300306def evaluate_stage2 (program_path ):
301307 """
302308 Second stage evaluation - full evaluation
0 commit comments