Skip to content

Commit 37aaf9e

Browse files
committed
Add example
1 parent 1eec40d commit 37aaf9e

File tree

8 files changed

+845
-0
lines changed

8 files changed

+845
-0
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Online Judge Programming With Evaluation Objects Example
2+
3+
This example is a variant of the [online judge example](https://github.com/codelion/openevolve/tree/main/examples/online_judge_programming) demonstrating the use of evaluation objects. See its documentation for the problem description.
4+
5+
## Running the example
6+
7+
First, fill your username and token in `example.kattisrc` according to your personal configuration file (must be logged in) from [Kattis](https://open.kattis.com/download/kattisrc) and rename the file as `.kittisrc`.
8+
9+
Then, to run this example:
10+
11+
```bash
12+
cd examples/online_judge_programming_with_eval_obj
13+
python main.py
14+
```
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Configuration for function minimization example
2+
max_iterations: 20
3+
checkpoint_interval: 1
4+
log_level: "INFO"
5+
6+
# LLM configuration
7+
llm:
8+
primary_model: "gemini-2.0-flash"
9+
primary_model_weight: 0.6
10+
secondary_model: "gemini-2.5-flash-preview-05-20"
11+
secondary_model_weight: 0.4
12+
api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
13+
api_key: YOUR_API_KEY
14+
temperature: 0.7
15+
top_p: 0.95
16+
max_tokens: 4096
17+
18+
# Prompt configuration
19+
prompt:
20+
system_message: |
21+
You are an expert programmer. Your task is to implement an algorithm in Python to pass all the test cases. The problem is as follows:
22+
23+
A string of lowercase letters is called alphabetical if some of the letters can be deleted so that the only letters that remain are the letters from a to z in order. Given a string s, determine the minimum number of letters to add anywhere in the string to make it alphabetical.
24+
25+
Input:
26+
Each input will consist of a single test case. Note that your program may be run multiple times on different inputs. The only line of input contains a string s (1 ≤ |s| ≤ 50) which contains only lowercase letters.
27+
Output:
28+
Output a single integer, which is the smallest number of letters needed to add to s to make it alphabetical.
29+
30+
Sample Input 1:
31+
xyzabcdefghijklmnopqrstuvw
32+
Sample Output 1:
33+
3
34+
35+
Sample Input 2:
36+
aiemckgobjfndlhp
37+
Sample Output 2:
38+
20
39+
40+
Your program should always read/write to STDIN/STDOUT. For example, to handle integer input, use the following format:
41+
```
42+
import sys
43+
for line in sys.stdin:
44+
data = int(line)
45+
```
46+
Use print() for output. For example:
47+
```
48+
print("Hello, World!")
49+
```
50+
num_top_programs: 3
51+
use_template_stochasticity: true
52+
53+
# Database configuration
54+
database:
55+
population_size: 50
56+
archive_size: 20
57+
num_islands: 3
58+
elite_selection_ratio: 0.2
59+
exploitation_ratio: 0.7
60+
61+
# Evaluator configuration
62+
evaluator:
63+
timeout: 60
64+
cascade_evaluation: false
65+
cascade_thresholds: [1.0]
66+
parallel_evaluations: 4
67+
use_llm_feedback: false
68+
69+
# Evolution settings
70+
diff_based_evolution: true
71+
allow_full_rewrites: false
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""
2+
Evaluator for the function minimization example
3+
"""
4+
5+
import re
6+
import subprocess
7+
import time
8+
import traceback
9+
10+
11+
def run_with_timeout(program_path, problem_name, timeout_seconds=60):
12+
"""
13+
Run a function with a timeout using subprocess.
14+
15+
Args:
16+
program_path: Program to submit
17+
problem_name: Short name of the problem to submit to
18+
timeout_seconds: Timeout in seconds
19+
20+
Returns:
21+
Result of the function or raises TimeoutError
22+
"""
23+
cmd = ["python", "submit.py", program_path, "-p", problem_name, "-l", "Python 3", "-f"]
24+
25+
try:
26+
# Run the command and grab its output using subprocess.Popen
27+
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
28+
stdout, stderr = proc.communicate(timeout=timeout_seconds)
29+
exit_code = proc.returncode
30+
if exit_code != 0:
31+
print(stderr) # Print the error output if the command failed
32+
raise RuntimeError(f"Process exited with code {exit_code}")
33+
except subprocess.TimeoutExpired:
34+
# Kill the process if it times out
35+
proc.kill()
36+
raise TimeoutError(f"Process timed out after {timeout_seconds} seconds")
37+
38+
pattern = (
39+
r"Score:\s*(\d+)\s*"
40+
r"Test cases done:\s*(\d+)\s*"
41+
r"Test cases correct:\s*(\d+)\s*"
42+
r"Test cases total:\s*(\d+)"
43+
)
44+
match = re.search(pattern, stdout)
45+
if not match:
46+
raise ValueError("Expected summary lines not found")
47+
48+
score, done, correct, total = map(int, match.groups())
49+
return score, done, correct, total
50+
51+
52+
class EvaluationObject:
53+
def __init__(self, problem_name: str, timeout_seconds: int):
54+
self.problem_name = problem_name
55+
self.timeout_seconds = timeout_seconds
56+
57+
def evaluate(self, program_path):
58+
"""
59+
Evaluate the program by submitting it to OJ and fetching metrics based on how well it performs.
60+
61+
Args:
62+
program_path: Path to the program file
63+
64+
Returns:
65+
Dictionary of metrics
66+
"""
67+
try:
68+
# For constructor-based approaches, a single evaluation is sufficient
69+
# since the result is deterministic
70+
start_time = time.time()
71+
72+
# Use subprocess to run with timeout
73+
score, done, correct, total = run_with_timeout(
74+
program_path, self.problem_name, self.timeout_seconds
75+
)
76+
77+
end_time = time.time()
78+
eval_time = end_time - start_time
79+
80+
# Combined score - higher is better
81+
combined_score = correct / total if total > 0 else 0.0
82+
83+
print(
84+
f"Evaluation: Score={score}, Done={done}, Correct={correct}, Total={total}, Combined={combined_score:.2f}"
85+
)
86+
87+
return {
88+
"score": score,
89+
"done": done,
90+
"correct": correct,
91+
"total": total,
92+
"eval_time": eval_time,
93+
"combined_score": float(combined_score),
94+
}
95+
96+
except Exception as e:
97+
print(f"Evaluation failed completely: {str(e)}")
98+
traceback.print_exc()
99+
return {
100+
"score": 0,
101+
"done": 0,
102+
"correct": 0,
103+
"total": 0,
104+
"eval_time": 0.0,
105+
"combined_score": 0.0,
106+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Please save this file as .kattisrc in your home directory.
2+
# This file includes a secret token that allows you to log in.
3+
# DO NOT SHARE IT WITH ANYONE ELSE.
4+
# If someone gets access to this token, please revoke it by changing your KATTIS password.
5+
6+
[user]
7+
username: YOUR_USERNAME
8+
token: YOUR_TOKEN
9+
10+
[kattis]
11+
hostname: open.kattis.com
12+
loginurl: https://open.kattis.com/login
13+
submissionurl: https://open.kattis.com/submit
14+
submissionsurl: https://open.kattis.com/submissions
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
"""Online judge programming example for OpenEvolve"""
2+
3+
# EVOLVE-BLOCK-START
4+
import sys
5+
6+
for line in sys.stdin:
7+
s = line.strip()
8+
9+
ans = 0
10+
print(ans)
11+
12+
# EVOLVE-BLOCK-END
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from argparse import ArgumentParser
2+
3+
from openevolve import OpenEvolve
4+
from evaluator import EvaluationObject
5+
6+
if __name__ == "__main__":
7+
parser = ArgumentParser()
8+
parser.add_argument(
9+
"-p",
10+
"--problem",
11+
help="Which problem to solve",
12+
)
13+
parser.add_argument(
14+
"-t",
15+
"--timeout",
16+
help="Timeout for a single submission (in seconds)",
17+
type=int,
18+
default=60,
19+
)
20+
21+
args = parser.parse_args()
22+
eval_obj = EvaluationObject(args.problem, args.timeout)
23+
evolve = OpenEvolve("initial_program.py", "", eval_obj, "config.yaml")
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lxml
2+
requests

0 commit comments

Comments
 (0)