Skip to content

Commit 5cd3436

Browse files
committed
Reformatted code with $ black openevolve tests examples
1 parent 1f08698 commit 5cd3436

File tree

18 files changed

+736
-727
lines changed

18 files changed

+736
-727
lines changed

examples/function_minimization/evaluator.py

Lines changed: 80 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Evaluator for the function minimization example
33
"""
4+
45
import importlib.util
56
import numpy as np
67
import time
@@ -9,16 +10,17 @@
910
import traceback
1011
import sys
1112

13+
1214
def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
1315
"""
1416
Run a function with a timeout using concurrent.futures
15-
17+
1618
Args:
1719
func: Function to run
1820
args: Arguments to pass to the function
1921
kwargs: Keyword arguments to pass to the function
2022
timeout_seconds: Timeout in seconds
21-
23+
2224
Returns:
2325
Result of the function or raises TimeoutError
2426
"""
@@ -27,7 +29,10 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
2729
try:
2830
return future.result(timeout=timeout_seconds)
2931
except concurrent.futures.TimeoutError:
30-
raise TimeoutError(f"Function {func.__name__} timed out after {timeout_seconds} seconds")
32+
raise TimeoutError(
33+
f"Function {func.__name__} timed out after {timeout_seconds} seconds"
34+
)
35+
3136

3237
def safe_float(value):
3338
"""Convert a value to float safely"""
@@ -37,28 +42,29 @@ def safe_float(value):
3742
print(f"Warning: Could not convert {value} of type {type(value)} to float")
3843
return 0.0
3944

45+
4046
def evaluate(program_path):
4147
"""
4248
Evaluate the program by running it multiple times and checking how close
4349
it gets to the known global minimum.
44-
50+
4551
Args:
4652
program_path: Path to the program file
47-
53+
4854
Returns:
4955
Dictionary of metrics
5056
"""
5157
# Known global minimum (approximate)
5258
GLOBAL_MIN_X = -1.76
5359
GLOBAL_MIN_Y = -1.03
5460
GLOBAL_MIN_VALUE = -2.104
55-
61+
5662
try:
5763
# Load the program
5864
spec = importlib.util.spec_from_file_location("program", program_path)
5965
program = importlib.util.module_from_spec(spec)
6066
spec.loader.exec_module(program)
61-
67+
6268
# Check if the required function exists
6369
if not hasattr(program, "run_search"):
6470
print(f"Error: program does not have 'run_search' function")
@@ -67,99 +73,111 @@ def evaluate(program_path):
6773
"distance_score": 0.0,
6874
"speed_score": 0.0,
6975
"combined_score": 0.0,
70-
"error": "Missing run_search function"
76+
"error": "Missing run_search function",
7177
}
72-
78+
7379
# Run multiple trials
7480
num_trials = 10
7581
values = []
7682
distances = []
7783
times = []
7884
success_count = 0
79-
85+
8086
for trial in range(num_trials):
8187
try:
8288
start_time = time.time()
83-
89+
8490
# Run with timeout
8591
result = run_with_timeout(program.run_search, timeout_seconds=5)
86-
92+
8793
# Check if we got a tuple of 3 values
8894
if not isinstance(result, tuple) or len(result) != 3:
89-
print(f"Trial {trial}: Invalid result format, expected tuple of 3 values but got {type(result)}")
95+
print(
96+
f"Trial {trial}: Invalid result format, expected tuple of 3 values but got {type(result)}"
97+
)
9098
continue
91-
99+
92100
x, y, value = result
93-
101+
94102
end_time = time.time()
95-
103+
96104
# Ensure all values are float
97105
x = safe_float(x)
98106
y = safe_float(y)
99107
value = safe_float(value)
100-
108+
101109
# Check if the result is valid (not NaN or infinite)
102-
if (np.isnan(x) or np.isnan(y) or np.isnan(value) or
103-
np.isinf(x) or np.isinf(y) or np.isinf(value)):
110+
if (
111+
np.isnan(x)
112+
or np.isnan(y)
113+
or np.isnan(value)
114+
or np.isinf(x)
115+
or np.isinf(y)
116+
or np.isinf(value)
117+
):
104118
print(f"Trial {trial}: Invalid result, got x={x}, y={y}, value={value}")
105119
continue
106-
120+
107121
# Calculate metrics
108122
x_diff = safe_float(x) - GLOBAL_MIN_X
109123
y_diff = safe_float(y) - GLOBAL_MIN_Y
110124
distance_to_global = np.sqrt(x_diff**2 + y_diff**2)
111125
value_difference = abs(value - GLOBAL_MIN_VALUE)
112-
126+
113127
values.append(float(value))
114128
distances.append(float(distance_to_global))
115129
times.append(float(end_time - start_time))
116130
success_count += 1
117-
131+
118132
except TimeoutError as e:
119133
print(f"Trial {trial}: {str(e)}")
120134
continue
121135
except IndexError as e:
122136
# Specifically handle IndexError which often happens with early termination checks
123137
print(f"Trial {trial}: IndexError - {str(e)}")
124-
print("This is likely due to a list index check before the list is fully populated.")
138+
print(
139+
"This is likely due to a list index check before the list is fully populated."
140+
)
125141
continue
126142
except Exception as e:
127143
print(f"Trial {trial}: Error - {str(e)}")
128144
print(traceback.format_exc())
129145
continue
130-
146+
131147
# If all trials failed, return zero scores
132148
if success_count == 0:
133149
return {
134150
"value_score": 0.0,
135151
"distance_score": 0.0,
136152
"speed_score": 0.0,
137153
"combined_score": 0.0,
138-
"error": "All trials failed"
154+
"error": "All trials failed",
139155
}
140-
156+
141157
# Calculate metrics
142158
avg_value = float(np.mean(values))
143159
avg_distance = float(np.mean(distances))
144160
avg_time = float(np.mean(times)) if times else 1.0
145-
161+
146162
# Convert to scores (higher is better)
147163
value_score = float(1.0 / (1.0 + abs(avg_value - GLOBAL_MIN_VALUE))) # Normalize and invert
148164
distance_score = float(1.0 / (1.0 + avg_distance))
149165
speed_score = float(1.0 / avg_time) if avg_time > 0 else 0.0
150-
166+
151167
# Normalize speed score (so it doesn't dominate)
152168
speed_score = float(min(speed_score, 10.0) / 10.0)
153-
169+
154170
# Add reliability score based on success rate
155171
reliability_score = float(success_count / num_trials)
156-
172+
157173
# Calculate a single combined score that prioritizes finding good solutions
158174
# over secondary metrics like speed and reliability
159175
# Value and distance scores (quality of solution) get 90% of the weight
160176
# Speed and reliability get only 10% combined
161-
combined_score = float(0.6 * value_score + 0.3 * distance_score + 0.05 * speed_score + 0.05 * reliability_score)
162-
177+
combined_score = float(
178+
0.6 * value_score + 0.3 * distance_score + 0.05 * speed_score + 0.05 * reliability_score
179+
)
180+
163181
# Also compute an "overall" score that will be the primary metric for selection
164182
# This adds a bonus for finding solutions close to the global minimum
165183
# and heavily penalizes solutions that aren't finding the right region
@@ -169,18 +187,18 @@ def evaluate(program_path):
169187
solution_quality = 0.5
170188
else: # Not finding the right region
171189
solution_quality = 0.1
172-
190+
173191
# Overall score is dominated by solution quality but also factors in the combined score
174192
overall_score = 0.8 * solution_quality + 0.2 * combined_score
175-
193+
176194
return {
177195
"value_score": value_score,
178196
"distance_score": distance_score,
179197
"speed_score": speed_score,
180198
"reliability_score": reliability_score,
181199
"combined_score": combined_score,
182200
"overall_score": overall_score, # This will be the primary selection metric
183-
"success_rate": reliability_score
201+
"success_rate": reliability_score,
184202
}
185203
except Exception as e:
186204
print(f"Evaluation failed completely: {str(e)}")
@@ -190,75 +208,85 @@ def evaluate(program_path):
190208
"distance_score": 0.0,
191209
"speed_score": 0.0,
192210
"combined_score": 0.0,
193-
"error": str(e)
211+
"error": str(e),
194212
}
195213

214+
196215
# Stage-based evaluation for cascade evaluation
197216
def evaluate_stage1(program_path):
198217
"""First stage evaluation with fewer trials"""
199218
# Known global minimum (approximate)
200219
GLOBAL_MIN_X = float(-1.76)
201220
GLOBAL_MIN_Y = float(-1.03)
202221
GLOBAL_MIN_VALUE = float(-2.104)
203-
222+
204223
# Quick check to see if the program runs without errors
205224
try:
206225
# Load the program
207226
spec = importlib.util.spec_from_file_location("program", program_path)
208227
program = importlib.util.module_from_spec(spec)
209228
spec.loader.exec_module(program)
210-
229+
211230
# Check if the required function exists
212231
if not hasattr(program, "run_search"):
213232
print(f"Stage 1 validation: Program does not have 'run_search' function")
214233
return {"runs_successfully": 0.0, "error": "Missing run_search function"}
215-
234+
216235
try:
217236
# Run a single trial with timeout
218237
result = run_with_timeout(program.run_search, timeout_seconds=5)
219-
238+
220239
# Check if we got a tuple of 3 values
221240
if not isinstance(result, tuple) or len(result) != 3:
222-
print(f"Stage 1: Invalid result format, expected tuple of 3 values but got {type(result)}")
241+
print(
242+
f"Stage 1: Invalid result format, expected tuple of 3 values but got {type(result)}"
243+
)
223244
return {"runs_successfully": 0.0, "error": "Invalid result format"}
224-
245+
225246
x, y, value = result
226-
247+
227248
# Ensure all values are float
228249
x = safe_float(x)
229250
y = safe_float(y)
230251
value = safe_float(value)
231-
252+
232253
# Check if the result is valid
233-
if np.isnan(x) or np.isnan(y) or np.isnan(value) or np.isinf(x) or np.isinf(y) or np.isinf(value):
254+
if (
255+
np.isnan(x)
256+
or np.isnan(y)
257+
or np.isnan(value)
258+
or np.isinf(x)
259+
or np.isinf(y)
260+
or np.isinf(value)
261+
):
234262
print(f"Stage 1 validation: Invalid result, got x={x}, y={y}, value={value}")
235263
return {"runs_successfully": 0.5, "error": "Invalid result values"}
236-
264+
237265
# Calculate distance safely
238266
x_diff = float(x) - GLOBAL_MIN_X
239267
y_diff = float(y) - GLOBAL_MIN_Y
240268
distance = float(np.sqrt(x_diff**2 + y_diff**2))
241-
269+
242270
# Calculate value-based score
243271
value_score = float(1.0 / (1.0 + abs(value - GLOBAL_MIN_VALUE)))
244272
distance_score = float(1.0 / (1.0 + distance))
245-
273+
246274
# Calculate solution quality metric
247275
if distance < 1.0: # Very close to the correct solution
248276
solution_quality = 1.0
249277
elif distance < 3.0: # In the right region
250278
solution_quality = 0.5
251279
else: # Not finding the right region
252280
solution_quality = 0.1
253-
281+
254282
# Basic metrics with overall score
255283
return {
256284
"runs_successfully": 1.0,
257285
"value": float(value),
258286
"distance": distance,
259287
"value_score": value_score,
260288
"distance_score": distance_score,
261-
"overall_score": solution_quality # This becomes a strong guiding metric
289+
"overall_score": solution_quality, # This becomes a strong guiding metric
262290
}
263291
except TimeoutError as e:
264292
print(f"Stage 1 evaluation timed out: {e}")
@@ -272,12 +300,13 @@ def evaluate_stage1(program_path):
272300
print(f"Stage 1 evaluation failed: {e}")
273301
print(traceback.format_exc())
274302
return {"runs_successfully": 0.0, "error": str(e)}
275-
303+
276304
except Exception as e:
277305
print(f"Stage 1 evaluation failed: {e}")
278306
print(traceback.format_exc())
279307
return {"runs_successfully": 0.0, "error": str(e)}
280308

309+
281310
def evaluate_stage2(program_path):
282311
"""Second stage evaluation with more thorough testing"""
283312
# Full evaluation as in the main evaluate function

0 commit comments

Comments
 (0)