Add example

tonyxty · tonyxty · commit 37aaf9eec6e4 · 2025-08-15T07:34:07.000+08:00
diff --git a/examples/online_judge_programming_with_eval_obj/README.md b/examples/online_judge_programming_with_eval_obj/README.md
@@ -0,0 +1,14 @@
+# Online Judge Programming With Evaluation Objects Example
+
+This example is a variant of the [online judge example](https://github.com/codelion/openevolve/tree/main/examples/online_judge_programming) demonstrating the use of evaluation objects. See its documentation for the problem description.
+
+## Running the example
+
+First, fill your username and token in `example.kattisrc` according to your personal configuration file (must be logged in) from [Kattis](https://open.kattis.com/download/kattisrc) and rename the file as `.kittisrc`.
+
+Then, to run this example:
+
+```bash
+cd examples/online_judge_programming_with_eval_obj
+python main.py
+```
diff --git a/examples/online_judge_programming_with_eval_obj/config.yaml b/examples/online_judge_programming_with_eval_obj/config.yaml
@@ -0,0 +1,71 @@
+# Configuration for function minimization example
+max_iterations: 20
+checkpoint_interval: 1
+log_level: "INFO"
+
+# LLM configuration
+llm:
+  primary_model: "gemini-2.0-flash"
+  primary_model_weight: 0.6
+  secondary_model: "gemini-2.5-flash-preview-05-20"
+  secondary_model_weight: 0.4
+  api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
+  api_key: YOUR_API_KEY
+  temperature: 0.7
+  top_p: 0.95
+  max_tokens: 4096
+
+# Prompt configuration
+prompt:
+  system_message: |
+    You are an expert programmer. Your task is to implement an algorithm in Python to pass all the test cases. The problem is as follows:
+
+    A string of lowercase letters is called alphabetical if some of the letters can be deleted so that the only letters that remain are the letters from a to z in order. Given a string s, determine the minimum number of letters to add anywhere in the string to make it alphabetical.
+
+    Input:
+    Each input will consist of a single test case. Note that your program may be run multiple times on different inputs. The only line of input contains a string s (1 ≤ |s| ≤ 50) which contains only lowercase letters.
+    Output:
+    Output a single integer, which is the smallest number of letters needed to add to s to make it alphabetical.
+
+    Sample Input 1:
+    xyzabcdefghijklmnopqrstuvw
+    Sample Output 1:
+    3
+
+    Sample Input 2:
+    aiemckgobjfndlhp
+    Sample Output 2:
+    20
+
+    Your program should always read/write to STDIN/STDOUT. For example, to handle integer input, use the following format:
+    ```
+    import sys
+    for line in sys.stdin:
+        data = int(line)
+    ```
+    Use print() for output. For example:
+    ```
+    print("Hello, World!")
+    ```
+  num_top_programs: 3
+  use_template_stochasticity: true
+
+# Database configuration
+database:
+  population_size: 50
+  archive_size: 20
+  num_islands: 3
+  elite_selection_ratio: 0.2
+  exploitation_ratio: 0.7
+
+# Evaluator configuration
+evaluator:
+  timeout: 60
+  cascade_evaluation: false
+  cascade_thresholds: [1.0]
+  parallel_evaluations: 4
+  use_llm_feedback: false
+
+# Evolution settings
+diff_based_evolution: true
+allow_full_rewrites: false
diff --git a/examples/online_judge_programming_with_eval_obj/evaluator.py b/examples/online_judge_programming_with_eval_obj/evaluator.py
@@ -0,0 +1,106 @@
+"""
+Evaluator for the function minimization example
+"""
+
+import re
+import subprocess
+import time
+import traceback
+
+
+def run_with_timeout(program_path, problem_name, timeout_seconds=60):
+    """
+    Run a function with a timeout using subprocess.
+
+    Args:
+        program_path: Program to submit
+        problem_name: Short name of the problem to submit to
+        timeout_seconds: Timeout in seconds
+
+    Returns:
+        Result of the function or raises TimeoutError
+    """
+    cmd = ["python", "submit.py", program_path, "-p", problem_name, "-l", "Python 3", "-f"]
+
+    try:
+        # Run the command and grab its output using subprocess.Popen
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        stdout, stderr = proc.communicate(timeout=timeout_seconds)
+        exit_code = proc.returncode
+        if exit_code != 0:
+            print(stderr)  # Print the error output if the command failed
+            raise RuntimeError(f"Process exited with code {exit_code}")
+    except subprocess.TimeoutExpired:
+        # Kill the process if it times out
+        proc.kill()
+        raise TimeoutError(f"Process timed out after {timeout_seconds} seconds")
+
+    pattern = (
+        r"Score:\s*(\d+)\s*"
+        r"Test cases done:\s*(\d+)\s*"
+        r"Test cases correct:\s*(\d+)\s*"
+        r"Test cases total:\s*(\d+)"
+    )
+    match = re.search(pattern, stdout)
+    if not match:
+        raise ValueError("Expected summary lines not found")
+
+    score, done, correct, total = map(int, match.groups())
+    return score, done, correct, total
+
+
+class EvaluationObject:
+    def __init__(self, problem_name: str, timeout_seconds: int):
+        self.problem_name = problem_name
+        self.timeout_seconds = timeout_seconds
+
+    def evaluate(self, program_path):
+        """
+        Evaluate the program by submitting it to OJ and fetching metrics based on how well it performs.
+
+        Args:
+            program_path: Path to the program file
+
+        Returns:
+            Dictionary of metrics
+        """
+        try:
+            # For constructor-based approaches, a single evaluation is sufficient
+            # since the result is deterministic
+            start_time = time.time()
+
+            # Use subprocess to run with timeout
+            score, done, correct, total = run_with_timeout(
+                program_path, self.problem_name, self.timeout_seconds
+            )
+
+            end_time = time.time()
+            eval_time = end_time - start_time
+
+            # Combined score - higher is better
+            combined_score = correct / total if total > 0 else 0.0
+
+            print(
+                f"Evaluation: Score={score}, Done={done}, Correct={correct}, Total={total}, Combined={combined_score:.2f}"
+            )
+
+            return {
+                "score": score,
+                "done": done,
+                "correct": correct,
+                "total": total,
+                "eval_time": eval_time,
+                "combined_score": float(combined_score),
+            }
+
+        except Exception as e:
+            print(f"Evaluation failed completely: {str(e)}")
+            traceback.print_exc()
+            return {
+                "score": 0,
+                "done": 0,
+                "correct": 0,
+                "total": 0,
+                "eval_time": 0.0,
+                "combined_score": 0.0,
+            }
diff --git a/examples/online_judge_programming_with_eval_obj/example.kattisrc b/examples/online_judge_programming_with_eval_obj/example.kattisrc
@@ -0,0 +1,14 @@
+# Please save this file as .kattisrc in your home directory.
+# This file includes a secret token that allows you to log in.
+# DO NOT SHARE IT WITH ANYONE ELSE.
+# If someone gets access to this token, please revoke it by changing your KATTIS password.
+
+[user]
+username: YOUR_USERNAME
+token: YOUR_TOKEN
+
+[kattis]
+hostname: open.kattis.com
+loginurl: https://open.kattis.com/login
+submissionurl: https://open.kattis.com/submit
+submissionsurl: https://open.kattis.com/submissions
diff --git a/examples/online_judge_programming_with_eval_obj/initial_program.py b/examples/online_judge_programming_with_eval_obj/initial_program.py
@@ -0,0 +1,12 @@
+"""Online judge programming example for OpenEvolve"""
+
+# EVOLVE-BLOCK-START
+import sys
+
+for line in sys.stdin:
+    s = line.strip()
+
+ans = 0
+print(ans)
+
+# EVOLVE-BLOCK-END
diff --git a/examples/online_judge_programming_with_eval_obj/main.py b/examples/online_judge_programming_with_eval_obj/main.py
@@ -0,0 +1,23 @@
+from argparse import ArgumentParser
+
+from openevolve import OpenEvolve
+from evaluator import EvaluationObject
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-p",
+        "--problem",
+        help="Which problem to solve",
+    )
+    parser.add_argument(
+        "-t",
+        "--timeout",
+        help="Timeout for a single submission (in seconds)",
+        type=int,
+        default=60,
+    )
+
+    args = parser.parse_args()
+    eval_obj = EvaluationObject(args.problem, args.timeout)
+    evolve = OpenEvolve("initial_program.py", "", eval_obj, "config.yaml")
diff --git a/examples/online_judge_programming_with_eval_obj/requirements.txt b/examples/online_judge_programming_with_eval_obj/requirements.txt
@@ -0,0 +1,2 @@
+lxml
+requests
diff --git a/examples/online_judge_programming_with_eval_obj/submit.py b/examples/online_judge_programming_with_eval_obj/submit.py