diff --git a/examples/online_judge_programming_with_eval_obj/README.md b/examples/online_judge_programming_with_eval_obj/README.md
new file mode 100644
index 000000000..69c2a2856
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/README.md
@@ -0,0 +1,14 @@
+# Online Judge Programming With Evaluation Objects Example
+
+This example is a variant of the [online judge example](https://github.com/codelion/openevolve/tree/main/examples/online_judge_programming) demonstrating the use of evaluation objects. See its documentation for the problem description.
+
+## Running the example
+
+First, fill your username and token in `example.kattisrc` according to your personal configuration file (must be logged in) from [Kattis](https://open.kattis.com/download/kattisrc) and rename the file as `.kittisrc`.
+
+Then, to run this example:
+
+```bash
+cd examples/online_judge_programming_with_eval_obj
+python main.py
+```
diff --git a/examples/online_judge_programming_with_eval_obj/config.yaml b/examples/online_judge_programming_with_eval_obj/config.yaml
new file mode 100644
index 000000000..7f640eb7c
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/config.yaml
@@ -0,0 +1,71 @@
+# Configuration for function minimization example
+max_iterations: 20
+checkpoint_interval: 1
+log_level: "INFO"
+
+# LLM configuration
+llm:
+  primary_model: "gemini-2.0-flash"
+  primary_model_weight: 0.6
+  secondary_model: "gemini-2.5-flash-preview-05-20"
+  secondary_model_weight: 0.4
+  api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
+  api_key: YOUR_API_KEY
+  temperature: 0.7
+  top_p: 0.95
+  max_tokens: 4096
+
+# Prompt configuration
+prompt:
+  system_message: |
+    You are an expert programmer. Your task is to implement an algorithm in Python to pass all the test cases. The problem is as follows:
+
+    A string of lowercase letters is called alphabetical if some of the letters can be deleted so that the only letters that remain are the letters from a to z in order. Given a string s, determine the minimum number of letters to add anywhere in the string to make it alphabetical.
+
+    Input:
+    Each input will consist of a single test case. Note that your program may be run multiple times on different inputs. The only line of input contains a string s (1 ≤ |s| ≤ 50) which contains only lowercase letters.
+    Output:
+    Output a single integer, which is the smallest number of letters needed to add to s to make it alphabetical.
+
+    Sample Input 1:
+    xyzabcdefghijklmnopqrstuvw
+    Sample Output 1:
+    3
+
+    Sample Input 2:
+    aiemckgobjfndlhp
+    Sample Output 2:
+    20
+
+    Your program should always read/write to STDIN/STDOUT. For example, to handle integer input, use the following format:
+    ```
+    import sys
+    for line in sys.stdin:
+        data = int(line)
+    ```
+    Use print() for output. For example:
+    ```
+    print("Hello, World!")
+    ```
+  num_top_programs: 3
+  use_template_stochasticity: true
+
+# Database configuration
+database:
+  population_size: 50
+  archive_size: 20
+  num_islands: 3
+  elite_selection_ratio: 0.2
+  exploitation_ratio: 0.7
+
+# Evaluator configuration
+evaluator:
+  timeout: 60
+  cascade_evaluation: false
+  cascade_thresholds: [1.0]
+  parallel_evaluations: 4
+  use_llm_feedback: false
+
+# Evolution settings
+diff_based_evolution: true
+allow_full_rewrites: false
diff --git a/examples/online_judge_programming_with_eval_obj/evaluator.py b/examples/online_judge_programming_with_eval_obj/evaluator.py
new file mode 100644
index 000000000..1e5954b23
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/evaluator.py
@@ -0,0 +1,106 @@
+"""
+Evaluator for the function minimization example
+"""
+
+import re
+import subprocess
+import time
+import traceback
+
+
+def run_with_timeout(program_path, problem_name, timeout_seconds=60):
+    """
+    Run a function with a timeout using subprocess.
+
+    Args:
+        program_path: Program to submit
+        problem_name: Short name of the problem to submit to
+        timeout_seconds: Timeout in seconds
+
+    Returns:
+        Result of the function or raises TimeoutError
+    """
+    cmd = ["python", "submit.py", program_path, "-p", problem_name, "-l", "Python 3", "-f"]
+
+    try:
+        # Run the command and grab its output using subprocess.Popen
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        stdout, stderr = proc.communicate(timeout=timeout_seconds)
+        exit_code = proc.returncode
+        if exit_code != 0:
+            print(stderr)  # Print the error output if the command failed
+            raise RuntimeError(f"Process exited with code {exit_code}")
+    except subprocess.TimeoutExpired:
+        # Kill the process if it times out
+        proc.kill()
+        raise TimeoutError(f"Process timed out after {timeout_seconds} seconds")
+
+    pattern = (
+        r"Score:\s*(\d+)\s*"
+        r"Test cases done:\s*(\d+)\s*"
+        r"Test cases correct:\s*(\d+)\s*"
+        r"Test cases total:\s*(\d+)"
+    )
+    match = re.search(pattern, stdout)
+    if not match:
+        raise ValueError("Expected summary lines not found")
+
+    score, done, correct, total = map(int, match.groups())
+    return score, done, correct, total
+
+
+class EvaluationObject:
+    def __init__(self, problem_name: str, timeout_seconds: int):
+        self.problem_name = problem_name
+        self.timeout_seconds = timeout_seconds
+
+    def evaluate(self, program_path):
+        """
+        Evaluate the program by submitting it to OJ and fetching metrics based on how well it performs.
+
+        Args:
+            program_path: Path to the program file
+
+        Returns:
+            Dictionary of metrics
+        """
+        try:
+            # For constructor-based approaches, a single evaluation is sufficient
+            # since the result is deterministic
+            start_time = time.time()
+
+            # Use subprocess to run with timeout
+            score, done, correct, total = run_with_timeout(
+                program_path, self.problem_name, self.timeout_seconds
+            )
+
+            end_time = time.time()
+            eval_time = end_time - start_time
+
+            # Combined score - higher is better
+            combined_score = correct / total if total > 0 else 0.0
+
+            print(
+                f"Evaluation: Score={score}, Done={done}, Correct={correct}, Total={total}, Combined={combined_score:.2f}"
+            )
+
+            return {
+                "score": score,
+                "done": done,
+                "correct": correct,
+                "total": total,
+                "eval_time": eval_time,
+                "combined_score": float(combined_score),
+            }
+
+        except Exception as e:
+            print(f"Evaluation failed completely: {str(e)}")
+            traceback.print_exc()
+            return {
+                "score": 0,
+                "done": 0,
+                "correct": 0,
+                "total": 0,
+                "eval_time": 0.0,
+                "combined_score": 0.0,
+            }
diff --git a/examples/online_judge_programming_with_eval_obj/example.kattisrc b/examples/online_judge_programming_with_eval_obj/example.kattisrc
new file mode 100644
index 000000000..249859bde
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/example.kattisrc
@@ -0,0 +1,14 @@
+# Please save this file as .kattisrc in your home directory.
+# This file includes a secret token that allows you to log in.
+# DO NOT SHARE IT WITH ANYONE ELSE.
+# If someone gets access to this token, please revoke it by changing your KATTIS password.
+
+[user]
+username: YOUR_USERNAME
+token: YOUR_TOKEN
+
+[kattis]
+hostname: open.kattis.com
+loginurl: https://open.kattis.com/login
+submissionurl: https://open.kattis.com/submit
+submissionsurl: https://open.kattis.com/submissions
diff --git a/examples/online_judge_programming_with_eval_obj/initial_program.py b/examples/online_judge_programming_with_eval_obj/initial_program.py
new file mode 100644
index 000000000..936b6845e
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/initial_program.py
@@ -0,0 +1,12 @@
+"""Online judge programming example for OpenEvolve"""
+
+# EVOLVE-BLOCK-START
+import sys
+
+for line in sys.stdin:
+    s = line.strip()
+
+ans = 0
+print(ans)
+
+# EVOLVE-BLOCK-END
diff --git a/examples/online_judge_programming_with_eval_obj/main.py b/examples/online_judge_programming_with_eval_obj/main.py
new file mode 100644
index 000000000..b86f44b25
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/main.py
@@ -0,0 +1,23 @@
+from argparse import ArgumentParser
+
+from openevolve import OpenEvolve
+from evaluator import EvaluationObject
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-p",
+        "--problem",
+        help="Which problem to solve",
+    )
+    parser.add_argument(
+        "-t",
+        "--timeout",
+        help="Timeout for a single submission (in seconds)",
+        type=int,
+        default=60,
+    )
+
+    args = parser.parse_args()
+    eval_obj = EvaluationObject(args.problem, args.timeout)
+    evolve = OpenEvolve("initial_program.py", "", eval_obj, "config.yaml")
\ No newline at end of file
diff --git a/examples/online_judge_programming_with_eval_obj/requirements.txt b/examples/online_judge_programming_with_eval_obj/requirements.txt
new file mode 100644
index 000000000..7cdc7eaad
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/requirements.txt
@@ -0,0 +1,2 @@
+lxml
+requests
\ No newline at end of file
diff --git a/examples/online_judge_programming_with_eval_obj/submit.py b/examples/online_judge_programming_with_eval_obj/submit.py
new file mode 100755
index 000000000..c2799824a
--- /dev/null
+++ b/examples/online_judge_programming_with_eval_obj/submit.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python
+import argparse
+import configparser
+import os
+import re
+import requests
+import sys
+import time
+
+from lxml.html import fragment_fromstring
+
+_DEFAULT_CONFIG = "/usr/local/etc/kattisrc"
+_LANGUAGE_GUESS = {
+    ".4th": "Forth",
+    ".c": "C",
+    ".c++": "C++",
+    ".cc": "C++",
+    ".c#": "C#",
+    ".cpp": "C++",
+    ".cs": "C#",
+    ".cxx": "C++",
+    ".cbl": "COBOL",
+    ".cob": "COBOL",
+    ".cpy": "COBOL",
+    ".forth": "Forth",
+    ".frt": "Forth",
+    ".fs": "F#",
+    ".fth": "Forth",
+    ".go": "Go",
+    ".hs": "Haskell",
+    ".java": "Java",
+    ".js": "JavaScript (Node.js)",
+    ".ts": "TypeScript",
+    ".kt": "Kotlin",
+    ".lisp": "Common Lisp",
+    ".cl": "Common Lisp",
+    ".m": "Objective-C",
+    ".ml": "OCaml",
+    ".pas": "Pascal",
+    ".php": "PHP",
+    ".pl": "Prolog",
+    ".py": "Python 3",
+    ".pyc": "Python 3",
+    ".rb": "Ruby",
+    ".rkt": "Racket",
+    ".rs": "Rust",
+    ".scala": "Scala",
+    ".f90": "Fortran",
+    ".f": "Fortran",
+    ".for": "Fortran",
+    ".sh": "Bash",
+    ".apl": "APL",
+    ".ss": "Gerbil",
+    ".jl": "Julia",
+    ".vb": "Visual Basic",
+    ".dart": "Dart",
+    ".zig": "Zig",
+    ".swift": "Swift",
+    ".nim": "Nim",
+    ".lua": "Lua",
+    ".pm": "Perl",
+    ".sno": "SNOBOL",
+    ".odin": "Odin",
+    ".a68": "Algol 68",
+    ".cr": "Crystal",
+    ".sim": "Simula 67",
+    ".d": "D",
+    ".mod": "Modula-2",
+    ".st": "Smalltalk",
+    ".adb": "Ada",
+    ".ads": "Ada",
+    ".erl": "Erlang",
+    ".ex": "Elixir",
+}
+
+_GUESS_MAINCLASS = {"Elixir", "Erlang", "Java", "Kotlin", "Modula-2", "Scala"}
+_GUESS_MAINFILE = {
+    "Ada",
+    "Algol 68",
+    "APL",
+    "Bash",
+    "Crystal",
+    "Dart",
+    "Forth",
+    "Gerbil",
+    "JavaScript (Node.js)",
+    "JavaScript (SpiderMonkey)",
+    "Julia",
+    "Common Lisp",
+    "Lua",
+    "Nim",
+    "Octave",
+    "Pascal",
+    "Perl",
+    "PHP",
+    "Python 2",
+    "Python 3",
+    "Racket",
+    "Ruby",
+    "Rust",
+    "Simula",
+    "Smalltalk",
+    "SNOBOL",
+    "TypeScript",
+    "Zig",
+}
+
+_HEADERS = {"User-Agent": "kattis-cli-submit"}
+
+_RUNNING_STATUS = 5
+_COMPILE_ERROR_STATUS = 8
+_ACCEPTED_STATUS = 16
+_STATUS_MAP = {
+    0: "New",  # <invalid value>
+    1: "New",
+    2: "Waiting for compile",
+    3: "Compiling",
+    4: "Waiting for run",
+    _RUNNING_STATUS: "Running",
+    6: "Judge Error",
+    # 7: '<invalid value>',
+    _COMPILE_ERROR_STATUS: "Compile Error",
+    9: "Run Time Error",
+    10: "Memory Limit Exceeded",
+    11: "Output Limit Exceeded",
+    12: "Time Limit Exceeded",
+    13: "Illegal Function",
+    14: "Wrong Answer",
+    # 15: '<invalid value>',
+    _ACCEPTED_STATUS: "Accepted",
+}
+
+
+class ConfigError(Exception):
+    pass
+
+
+def get_url(cfg, option, default):
+    if cfg.has_option("kattis", option):
+        return cfg.get("kattis", option)
+    else:
+        hostname = cfg.get("kattis", "hostname")
+        return f"https://{hostname}/{default}"
+
+
+def get_config():
+    """Returns a ConfigParser object for the .kattisrc file(s)"""
+    cfg = configparser.ConfigParser()
+    if os.path.exists(_DEFAULT_CONFIG):
+        cfg.read(_DEFAULT_CONFIG)
+
+    try:
+        file = __file__
+    except NameError:
+        file = sys.argv[0]
+
+    if not cfg.read(
+        [
+            os.path.join(os.path.expanduser("~"), ".kattisrc"),
+            os.path.join(os.path.dirname(file), ".kattisrc"),
+            os.path.join(os.path.dirname(os.path.realpath(file)), ".kattisrc"),
+        ]
+    ):
+        raise ConfigError(
+            """\
+I failed to read in a config file from your home directory or from the
+same directory as this script. To download a .kattisrc file please visit
+https://<kattis>/download/kattisrc
+
+The file should look something like this:
+[user]
+username: yourusername
+token: *********
+
+[kattis]
+hostname: <kattis>
+loginurl: https://<kattis>/login
+submissionurl: https://<kattis>/submit
+submissionsurl: https://<kattis>/submissions"""
+        )
+    return cfg
+
+
+def is_python2(files):
+    python2 = re.compile(r"^\s*\bprint\b *[^ \(\),\]]|\braw_input\b")
+    for filename in files:
+        try:
+            with open(filename) as f:
+                for index, line in enumerate(f):
+                    if index == 0 and line.startswith("#!"):
+                        if "python2" in line:
+                            return True
+                        if "python3" in line:
+                            return False
+                    if python2.search(line.split("#")[0]):
+                        return True
+        except UnicodeDecodeError:
+            pass
+        except IOError:
+            return False
+    return False
+
+
+def guess_language(ext, files):
+    if ext == ".C":
+        return "C++"
+    ext = ext.lower()
+    if ext == ".h":
+        if any(f.endswith(".c") for f in files):
+            return "C"
+        else:
+            return "C++"
+    if ext == ".py":
+        if is_python2(files):
+            return "Python 2"
+        else:
+            return "Python 3"
+    return _LANGUAGE_GUESS.get(ext, None)
+
+
+def guess_mainfile(language, files):
+    for filename in files:
+        if os.path.splitext(os.path.basename(filename))[0] in ["main", "Main"]:
+            return filename
+    for filename in files:
+        try:
+            with open(filename) as f:
+                conts = f.read()
+                if language in ["Java", "Rust", "Scala", "Kotlin"] and re.search(
+                    r" main\s*\(", conts
+                ):
+                    return filename
+                if language == "Pascal" and re.match(r"^\s*[Pp]rogram\b", conts):
+                    return filename
+        except UnicodeDecodeError:
+            pass
+        except IOError:
+            pass
+    return files[0]
+
+
+def guess_mainclass(language, files):
+    if language in _GUESS_MAINFILE and len(files) > 1:
+        return os.path.basename(guess_mainfile(language, files))
+    if language in _GUESS_MAINCLASS:
+        mainfile = os.path.basename(guess_mainfile(language, files))
+        name = os.path.splitext(mainfile)[0]
+        if language == "Kotlin":
+            return name[0].upper() + name[1:] + "Kt"
+        return name
+    return None
+
+
+def login(login_url, username, password=None, token=None):
+    """Log in to Kattis.
+
+    At least one of password or token needs to be provided.
+
+    Returns a requests.Response with cookies needed to be able to submit
+    """
+    login_args = {"user": username, "script": "true"}
+    if password:
+        login_args["password"] = password
+    if token:
+        login_args["token"] = token
+
+    return requests.post(login_url, data=login_args, headers=_HEADERS)
+
+
+def login_from_config(cfg):
+    """Log in to Kattis using the access information in a kattisrc file
+
+    Returns a requests.Response with cookies needed to be able to submit
+    """
+    username = cfg.get("user", "username")
+    password = token = None
+    try:
+        password = cfg.get("user", "password")
+    except configparser.NoOptionError:
+        pass
+    try:
+        token = cfg.get("user", "token")
+    except configparser.NoOptionError:
+        pass
+    if password is None and token is None:
+        raise ConfigError(
+            """\
+Your .kattisrc file appears corrupted. It must provide a token (or a
+KATTIS password).
+
+Please download a new .kattisrc file"""
+        )
+
+    loginurl = get_url(cfg, "loginurl", "login")
+    return login(loginurl, username, password, token)
+
+
+def submit(
+    submit_url,
+    cookies,
+    problem,
+    language,
+    files,
+    mainclass="",
+    tag="",
+    assignment=None,
+    contest=None,
+):
+    """Make a submission.
+
+    The url_opener argument is an OpenerDirector object to use (as
+    returned by the login() function)
+
+    Returns the requests.Result from the submission
+    """
+
+    data = {
+        "submit": "true",
+        "submit_ctr": 2,
+        "language": language,
+        "mainclass": mainclass,
+        "problem": problem,
+        "tag": tag,
+        "script": "true",
+    }
+
+    if assignment is not None:
+        data["assignment"] = assignment
+    if contest is not None:
+        data["contest"] = contest
+    sub_files = []
+    for f in files:
+        with open(f, "rb") as sub_file:
+            sub_files.append(
+                ("sub_file[]", (os.path.basename(f), sub_file.read(), "application/octet-stream"))
+            )
+
+    return requests.post(submit_url, data=data, files=sub_files, cookies=cookies, headers=_HEADERS)
+
+
+def confirm_or_die(problem, language, files, mainclass, tag):
+    print("Problem:", problem)
+    print("Language:", language)
+    print("Files:", ", ".join(files))
+    if mainclass:
+        if language in _GUESS_MAINFILE:
+            print("Main file:", mainclass)
+        else:
+            print("Mainclass:", mainclass)
+    if tag:
+        print("Tag:", tag)
+    print("Submit (y/N)?")
+    if sys.stdin.readline().upper()[:-1] != "Y":
+        print("Cancelling")
+        sys.exit(1)
+
+
+def get_submission_url(submit_response, cfg):
+    m = re.search(r"Submission ID: (\d+)", submit_response)
+    if m:
+        submissions_url = get_url(cfg, "submissionsurl", "submissions")
+        submission_id = m.group(1)
+        return f"{submissions_url}/{submission_id}"
+
+
+def get_submission_status(submission_url, cookies):
+    reply = requests.get(submission_url + "?json", cookies=cookies, headers=_HEADERS)
+    return reply.json()
+
+
+_RED_COLOR = 31
+_GREEN_COLOR = 32
+_YELLOW_COLOR = 33
+
+
+def color(s, c):
+    return f"\x1b[{c}m{s}\x1b[0m"
+
+
+def show_judgement(submission_url, cfg):
+    login_reply = login_from_config(cfg)
+    while True:
+        status = get_submission_status(submission_url, login_reply.cookies)
+        status_id = status["status_id"]
+        testcases_done = status["testcase_index"]
+        testcases_total = status["row_html"].count("<i") - 1
+
+        status_text = _STATUS_MAP.get(status_id, f"Unknown status {status_id}")
+
+        if status_id < _RUNNING_STATUS:
+            print(f"\r{status_text}...", end="")
+        else:
+            print("\rTest cases: ", end="")
+
+        if status_id == _COMPILE_ERROR_STATUS:
+            print(f"\r{color(status_text, _RED_COLOR)}", end="")
+            try:
+                root = fragment_fromstring(status["feedback_html"], create_parent=True)
+                error = root.find(".//pre").text
+                print(color(":", _RED_COLOR))
+                print(error, end="")
+            except:
+                pass
+        elif status_id < _RUNNING_STATUS:
+            print(f"\r{status_text}...", end="")
+        else:
+            print("\rTest cases: ", end="")
+
+            if testcases_total == 0:
+                print("???", end="")
+            else:
+                progress = ""
+                testcases_correct = 0
+                for i in re.findall(r'<i class="([\w\- ]*)" title', status["row_html"]):
+                    if "is-empty" in i:
+                        break
+                    if "accepted" in i:
+                        progress += color(".", _GREEN_COLOR)
+                        testcases_correct += 1
+                    if "rejected" in i:
+                        progress += color("x", _RED_COLOR)
+
+                # NB: We need to do the following math since len(color('.', _SOME_COLOR)) == 10
+                if status_id == _RUNNING_STATUS:
+                    progress = progress[: 10 * (testcases_done - 1)] + color("?", _YELLOW_COLOR)
+                print(
+                    f'[{progress}{" " * (9*testcases_done + testcases_total - len(progress))}]  {testcases_done} / {testcases_total}',
+                    end="",
+                )
+
+        sys.stdout.flush()
+
+        if status_id > _RUNNING_STATUS:
+            # Done
+            print()
+            success = status_id == _ACCEPTED_STATUS
+            try:
+                root = fragment_fromstring(status["row_html"], create_parent=True)
+                cpu_time = root.xpath('.//*[@data-type="cpu"]')[0].text_content()
+                try:
+                    score = re.findall(
+                        r"\(([\d\.]+)\)", root.xpath('.//*[@data-type="status"]')[0].text_content()
+                    )[0]
+                except:
+                    score = ""
+                status_text += (
+                    " (" + cpu_time + ", " + score + ")" if score else " (" + cpu_time + ")"
+                )
+            except:
+                pass
+            if status_id != _COMPILE_ERROR_STATUS:
+                print(color(status_text, _GREEN_COLOR if success else _RED_COLOR))
+            numerical_score = int(score) if score else 0
+            return success, numerical_score, testcases_done, testcases_correct, testcases_total
+
+        time.sleep(0.25)
+
+
+def main():
+    parser = argparse.ArgumentParser(prog="kattis", description="Submit a solution to Kattis")
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        "-a",
+        "--assignment",
+        help="""Short name of assignment you want to submit to
+Overrides default guess (server guesses based on assignments you are in)""",
+    )
+    group.add_argument(
+        "-c",
+        "--contest",
+        help="""Short name of contest you want to submit to
+Overrides default guess (server guesses based on contests you are in)""",
+    )
+    parser.add_argument(
+        "-p",
+        "--problem",
+        help="""'Which problem to submit to.
+Overrides default guess (first part of first filename)""",
+    )
+    parser.add_argument(
+        "-m",
+        "--mainclass",
+        help="""Sets mainclass.
+Overrides default guess (first part of first filename)""",
+    )
+    parser.add_argument(
+        "-l",
+        "--language",
+        help="""Sets language.
+Overrides default guess (based on suffix of first filename)""",
+    )
+    parser.add_argument("-t", "--tag", help=argparse.SUPPRESS)
+    parser.add_argument(
+        "-f", "--force", help="Force, no confirmation prompt before submission", action="store_true"
+    )
+    parser.add_argument("files", nargs="+")
+
+    args = parser.parse_args()
+    files = args.files
+
+    try:
+        cfg = get_config()
+    except ConfigError as exc:
+        print(exc)
+        sys.exit(1)
+
+    problem, ext = os.path.splitext(os.path.basename(files[0]))
+    language = guess_language(ext, files)
+    mainclass = guess_mainclass(language, files)
+    tag = args.tag
+
+    problem = problem.lower()
+
+    if args.problem:
+        problem = args.problem
+
+    if args.mainclass is not None:
+        mainclass = args.mainclass
+
+    if args.language:
+        language = args.language
+
+    if language is None:
+        print(
+            f'''\
+No language specified, and I failed to guess language from filename
+extension "{ext}"'''
+        )
+        sys.exit(1)
+
+    files = sorted(list(set(args.files)))
+
+    try:
+        login_reply = login_from_config(cfg)
+    except ConfigError as exc:
+        print(exc)
+        sys.exit(1)
+    except requests.exceptions.RequestException as err:
+        print("Login connection failed:", err)
+        sys.exit(1)
+
+    if not login_reply.status_code == 200:
+        print("Login failed.")
+        if login_reply.status_code == 403:
+            print("Incorrect username or password/token (403)")
+        elif login_reply.status_code == 404:
+            print("Incorrect login URL (404)")
+        else:
+            print("Status code:", login_reply.status_code)
+        sys.exit(1)
+
+    submit_url = get_url(cfg, "submissionurl", "submit")
+
+    if not args.force:
+        confirm_or_die(problem, language, files, mainclass, tag)
+
+    try:
+        result = submit(
+            submit_url,
+            login_reply.cookies,
+            problem,
+            language,
+            files,
+            mainclass,
+            tag,
+            args.assignment,
+            args.contest,
+        )
+    except requests.exceptions.RequestException as err:
+        print("Submit connection failed:", err)
+        sys.exit(1)
+
+    if result.status_code != 200:
+        print("Submission failed.")
+        if result.status_code == 403:
+            print("Access denied (403)")
+        elif result.status_code == 404:
+            print("Incorrect submit URL (404)")
+        else:
+            print("Status code:", result.status_code)
+        sys.exit(1)
+
+    plain_result = result.content.decode("utf-8").replace("<br />", "\n")
+    print(plain_result)
+
+    submission_url = None
+    try:
+        submission_url = get_submission_url(plain_result, cfg)
+    except configparser.NoOptionError:
+        pass
+
+    if submission_url:
+        _, score, testcases_done, testcases_correct, testcases_total = show_judgement(
+            submission_url, cfg
+        )
+        print(f"Score: {score}")
+        print(f"Test cases done: {testcases_done}")
+        print(f"Test cases correct: {testcases_correct}")
+        print(f"Test cases total: {testcases_total}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/openevolve/controller.py b/openevolve/controller.py
index 8100277ed..3562a2b21 100644
--- a/openevolve/controller.py
+++ b/openevolve/controller.py
@@ -13,7 +13,7 @@
 
 from openevolve.config import Config, load_config
 from openevolve.database import Program, ProgramDatabase
-from openevolve.evaluator import Evaluator
+from openevolve.evaluator import Evaluator, EvaluationObject
 from openevolve.llm.ensemble import LLMEnsemble
 from openevolve.prompt.sampler import PromptSampler
 from openevolve.process_parallel import ProcessParallelController
@@ -74,6 +74,7 @@ def __init__(
         self,
         initial_program_path: str,
         evaluation_file: str,
+        evaluation_object: Optional[EvaluationObject] = None,
         config_path: Optional[str] = None,
         config: Optional[Config] = None,
         output_dir: Optional[str] = None,
@@ -154,12 +155,14 @@ def __init__(
         self.evaluator = Evaluator(
             self.config.evaluator,
             evaluation_file,
+            evaluation_object,
             self.llm_evaluator_ensemble,
             self.evaluator_prompt_sampler,
             database=self.database,
             suffix=Path(self.initial_program_path).suffix,
         )
         self.evaluation_file = evaluation_file
+        self.evaluation_object = self.evaluator.evaluation_object
 
         logger.info(f"Initialized OpenEvolve with {initial_program_path}")
 
@@ -276,7 +279,7 @@ async def run(
         # Initialize improved parallel processing
         try:
             self.parallel_controller = ProcessParallelController(
-                self.config, self.evaluation_file, self.database
+                self.config, self.evaluation_file, self.evaluation_object, self.database
             )
 
             # Set up signal handlers for graceful shutdown
diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py
index 0e25470b0..504776412 100644
--- a/openevolve/evaluator.py
+++ b/openevolve/evaluator.py
@@ -13,8 +13,9 @@
 import time
 import traceback
 import uuid
+import warnings
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, Protocol, cast
 import traceback
 
 from openevolve.config import EvaluatorConfig
@@ -29,6 +30,22 @@
 logger = logging.getLogger(__name__)
 
 
+class EvaluationObject(Protocol):
+    def evaluate(self, program_path: str) -> EvaluationResult:
+        ...
+
+
+class CascadeEvaluationObject(Protocol):
+    def evaluate_stage1(self, program_path: str) -> EvaluationResult:
+        ...
+
+    def evaluate_stage2(self, program_path: str) -> EvaluationResult:
+        ...
+
+    def evaluate_stage3(self, program_path: str) -> EvaluationResult:
+        ...
+
+
 class Evaluator:
     """
     Evaluates programs and assigns scores
@@ -41,14 +58,18 @@ def __init__(
         self,
         config: EvaluatorConfig,
         evaluation_file: str,
+        evaluation_object: Optional[EvaluationObject] = None,
         llm_ensemble: Optional[LLMEnsemble] = None,
         prompt_sampler: Optional[PromptSampler] = None,
         database: Optional[ProgramDatabase] = None,
         suffix: Optional[str]=".py",
     ):
+        if evaluation_file and evaluation_object:
+            warnings.warn("Both evaluation_file and evaluation_object provided - evaluation_object overrides evaluation_file")
         self.config = config
         self.evaluation_file = evaluation_file
         self.program_suffix = suffix
+        self.evaluation_object = evaluation_object
         self.llm_ensemble = llm_ensemble
         self.prompt_sampler = prompt_sampler
         self.database = database
@@ -56,8 +77,9 @@ def __init__(
         # Create a task pool for parallel evaluation
         self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations)
 
-        # Set up evaluation function if file exists
-        self._load_evaluation_function()
+        if self.evaluation_object is None:
+            # Set up evaluation module if file exists
+            self._load_evaluation_function()
 
         # Pending artifacts storage for programs
         self._pending_artifacts: Dict[str, Dict[str, Union[str, bytes]]] = {}
@@ -89,7 +111,7 @@ def _load_evaluation_function(self) -> None:
                     f"Evaluation file {self.evaluation_file} does not contain an 'evaluate' function"
                 )
 
-            self.evaluate_function = module.evaluate
+            self.evaluation_object = module
             logger.info(f"Successfully loaded evaluation function from {self.evaluation_file}")
 
             # Validate cascade configuration
@@ -348,7 +370,7 @@ async def _direct_evaluate(
         # Create a coroutine that runs the evaluation function in an executor
         async def run_evaluation():
             loop = asyncio.get_event_loop()
-            return await loop.run_in_executor(None, self.evaluate_function, program_path)
+            return await loop.run_in_executor(None, self.evaluation_object.evaluate, program_path)
 
         # Run the evaluation with timeout - let exceptions bubble up for retry handling
         result = await asyncio.wait_for(run_evaluation(), timeout=self.config.timeout)
@@ -369,23 +391,11 @@ async def _cascade_evaluate(
         Returns:
             Dictionary of metrics or EvaluationResult with metrics and artifacts
         """
-        # Import the evaluation module to get cascade functions if they exist
+        # This cast just makes static type checkers happy; actual checking is still done using hasattr
+        evaluation_object = cast(CascadeEvaluationObject, self.evaluation_object)
         try:
-            # Add the evaluation file's directory to Python path so it can import local modules
-            eval_dir = os.path.dirname(os.path.abspath(self.evaluation_file))
-            if eval_dir not in sys.path:
-                sys.path.insert(0, eval_dir)
-                logger.debug(f"Added {eval_dir} to Python path for cascade evaluation")
-
-            spec = importlib.util.spec_from_file_location("evaluation_module", self.evaluation_file)
-            if spec is None or spec.loader is None:
-                return await self._direct_evaluate(program_path)
-
-            module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(module)
-
             # Check if cascade functions exist
-            if not hasattr(module, "evaluate_stage1"):
+            if not hasattr(evaluation_object, "evaluate_stage1"):
                 return await self._direct_evaluate(program_path)
 
             # Run first stage with timeout
@@ -393,7 +403,7 @@ async def _cascade_evaluate(
 
                 async def run_stage1():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage1, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage1, program_path)
 
                 stage1_result = await asyncio.wait_for(run_stage1(), timeout=self.config.timeout)
                 stage1_eval_result = self._process_evaluation_result(stage1_result)
@@ -426,7 +436,7 @@ async def run_stage1():
                 return stage1_eval_result
 
             # Check if second stage exists
-            if not hasattr(module, "evaluate_stage2"):
+            if not hasattr(evaluation_object, "evaluate_stage2"):
                 return stage1_eval_result
 
             # Run second stage with timeout
@@ -434,7 +444,7 @@ async def run_stage1():
 
                 async def run_stage2():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage2, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage2, program_path)
 
                 stage2_result = await asyncio.wait_for(run_stage2(), timeout=self.config.timeout)
                 stage2_eval_result = self._process_evaluation_result(stage2_result)
@@ -488,7 +498,7 @@ async def run_stage2():
                 return merged_result
 
             # Check if third stage exists
-            if not hasattr(module, "evaluate_stage3"):
+            if not hasattr(evaluation_object, "evaluate_stage3"):
                 return merged_result
 
             # Run third stage with timeout
@@ -496,7 +506,7 @@ async def run_stage2():
 
                 async def run_stage3():
                     loop = asyncio.get_event_loop()
-                    return await loop.run_in_executor(None, module.evaluate_stage3, program_path)
+                    return await loop.run_in_executor(None, evaluation_object.evaluate_stage3, program_path)
 
                 stage3_result = await asyncio.wait_for(run_stage3(), timeout=self.config.timeout)
                 stage3_eval_result = self._process_evaluation_result(stage3_result)
diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py
index d2df82f6c..84fb4285e 100644
--- a/openevolve/process_parallel.py
+++ b/openevolve/process_parallel.py
@@ -15,6 +15,7 @@
 
 from openevolve.config import Config
 from openevolve.database import Program, ProgramDatabase
+from openevolve.evaluator import EvaluationObject
 
 logger = logging.getLogger(__name__)
 
@@ -33,10 +34,11 @@ class SerializableResult:
     error: Optional[str] = None
 
 
-def _worker_init(config_dict: dict, evaluation_file: str) -> None:
+def _worker_init(config_dict: dict, evaluation_file: str, evaluation_object: EvaluationObject) -> None:
     """Initialize worker process with necessary components"""
     global _worker_config
     global _worker_evaluation_file
+    global _worker_evaluation_object
     global _worker_evaluator
     global _worker_llm_ensemble
     global _worker_prompt_sampler
@@ -79,6 +81,7 @@ def _worker_init(config_dict: dict, evaluation_file: str) -> None:
         },
     )
     _worker_evaluation_file = evaluation_file
+    _worker_evaluation_object = evaluation_object
 
     # These will be lazily initialized on first use
     _worker_evaluator = None
@@ -115,6 +118,7 @@ def _lazy_init_worker_components():
         _worker_evaluator = Evaluator(
             _worker_config.evaluator,
             _worker_evaluation_file,
+            _worker_evaluation_object,
             evaluator_llm,
             evaluator_prompt,
             database=None,  # No shared database in worker
@@ -258,9 +262,10 @@ def _run_iteration_worker(
 class ProcessParallelController:
     """Controller for process-based parallel evolution"""
 
-    def __init__(self, config: Config, evaluation_file: str, database: ProgramDatabase):
+    def __init__(self, config: Config, evaluation_file: str, evaluation_object: Optional[EvaluationObject], database: ProgramDatabase):
         self.config = config
         self.evaluation_file = evaluation_file
+        self.evaluation_object = evaluation_object
         self.database = database
 
         self.executor: Optional[ProcessPoolExecutor] = None
@@ -310,7 +315,7 @@ def start(self) -> None:
         self.executor = ProcessPoolExecutor(
             max_workers=self.num_workers,
             initializer=_worker_init,
-            initargs=(config_dict, self.evaluation_file),
+            initargs=(config_dict, self.evaluation_file, self.evaluation_object),
         )
 
         logger.info(f"Started process pool with {self.num_workers} processes")
@@ -514,16 +519,11 @@ async def run_evolution(
 
                     # Check target score
                     if target_score is not None and child_program.metrics:
-                        numeric_metrics = [
-                            v for v in child_program.metrics.values() if isinstance(v, (int, float))
-                        ]
-                        if numeric_metrics:
-                            avg_score = sum(numeric_metrics) / len(numeric_metrics)
-                            if avg_score >= target_score:
-                                logger.info(
-                                    f"Target score {target_score} reached at iteration {completed_iteration}"
-                                )
-                                break
+                        if child_program.metrics["combined_score"] >= target_score:
+                            logger.info(
+                                f"Target score {target_score} reached at iteration {completed_iteration}"
+                            )
+                            break
 
             except Exception as e:
                 logger.error(f"Error processing result from iteration {completed_iteration}: {e}")
diff --git a/tests/test_checkpoint_resume.py b/tests/test_checkpoint_resume.py
index 0320ae289..dcc5fce7a 100644
--- a/tests/test_checkpoint_resume.py
+++ b/tests/test_checkpoint_resume.py
@@ -21,6 +21,8 @@
 class MockEvaluator:
     """Mock evaluator for testing"""
 
+    evaluation_object = None
+    
     def __init__(self):
         self.call_count = 0
 
diff --git a/tests/test_process_parallel.py b/tests/test_process_parallel.py
index f7965a2e4..821eecf44 100644
--- a/tests/test_process_parallel.py
+++ b/tests/test_process_parallel.py
@@ -64,7 +64,7 @@ def tearDown(self):
 
     def test_controller_initialization(self):
         """Test that controller initializes correctly"""
-        controller = ProcessParallelController(self.config, self.eval_file, self.database)
+        controller = ProcessParallelController(self.config, self.eval_file, None, self.database)
 
         self.assertEqual(controller.num_workers, 2)
         self.assertIsNone(controller.executor)
@@ -72,7 +72,7 @@ def test_controller_initialization(self):
 
     def test_controller_start_stop(self):
         """Test starting and stopping the controller"""
-        controller = ProcessParallelController(self.config, self.eval_file, self.database)
+        controller = ProcessParallelController(self.config, self.eval_file, None, self.database)
 
         # Start controller
         controller.start()
@@ -85,7 +85,7 @@ def test_controller_start_stop(self):
 
     def test_database_snapshot_creation(self):
         """Test creating database snapshot for workers"""
-        controller = ProcessParallelController(self.config, self.eval_file, self.database)
+        controller = ProcessParallelController(self.config, self.eval_file, None, self.database)
 
         snapshot = controller._create_database_snapshot()
 
@@ -106,7 +106,7 @@ def test_run_evolution_basic(self):
         """Test basic evolution run"""
 
         async def run_test():
-            controller = ProcessParallelController(self.config, self.eval_file, self.database)
+            controller = ProcessParallelController(self.config, self.eval_file, None, self.database)
 
             # Mock the executor to avoid actually spawning processes
             with patch.object(controller, "_submit_iteration") as mock_submit:
@@ -152,7 +152,7 @@ async def run_test():
 
     def test_request_shutdown(self):
         """Test graceful shutdown request"""
-        controller = ProcessParallelController(self.config, self.eval_file, self.database)
+        controller = ProcessParallelController(self.config, self.eval_file, None, self.database)
 
         # Request shutdown
         controller.request_shutdown()