diff --git a/examples/rust_adaptive_sort/config.yaml b/examples/rust_adaptive_sort/config.yaml
index 2d48e94d..f69f8c96 100644
--- a/examples/rust_adaptive_sort/config.yaml
+++ b/examples/rust_adaptive_sort/config.yaml
@@ -8,14 +8,14 @@ file_suffix: ".rs"
 
 # LLM configuration
 llm:
-  primary_model: "gemini-2.5-flash-lite-preview-06-17"
+  primary_model: "gemini-flash-lite-latest"
   primary_model_weight: 0.8
-  secondary_model: "gemini-2.5-flash"
+  secondary_model: "gemini-flash-latest"
   secondary_model_weight: 0.2
   api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
   
   temperature: 0.7
-  max_tokens: 4096
+  max_tokens: 16384
   
   # Custom system message for Rust performance programming
   system_message: |
diff --git a/examples/rust_adaptive_sort/evaluator.py b/examples/rust_adaptive_sort/evaluator.py
index 65f4ae29..f8ff8b5f 100644
--- a/examples/rust_adaptive_sort/evaluator.py
+++ b/examples/rust_adaptive_sort/evaluator.py
@@ -4,19 +4,31 @@
 
 import asyncio
 import json
-import os
 import subprocess
 import tempfile
-import time
 from pathlib import Path
-from typing import Dict, Any, List
+from openevolve.evaluation_result import EvaluationResult
+import logging
+import os
 
-import numpy as np
+THIS_FILE_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
+
+logger = logging.getLogger("examples.rust_adaptive_sort.evaluator")
 
-from openevolve.evaluation_result import EvaluationResult
 
 
-async def evaluate(program_path: str) -> EvaluationResult:
+def evaluate(program_path: str) -> EvaluationResult:
+    result = asyncio.run(_evaluate(program_path))
+    if "error" in result.artifacts:
+        logger.error(f"Error evaluating program: {result.artifacts['error']}")
+        if "stderr" in result.artifacts:
+            logger.error(f"Stderr: {result.artifacts['stderr']}")
+        if "stdout" in result.artifacts:
+            logger.error(f"Stdout: {result.artifacts['stdout']}")
+    return result
+
+
+async def _evaluate(program_path: str) -> EvaluationResult:
     """
     Evaluate a Rust sorting algorithm implementation.
 
@@ -41,7 +53,10 @@ async def evaluate(program_path: str) -> EvaluationResult:
             if result.returncode != 0:
                 return EvaluationResult(
                     metrics={"score": 0.0, "compile_success": 0.0},
-                    artifacts={"error": "Failed to create Cargo project", "stderr": result.stderr},
+                    artifacts={
+                        "error": "Failed to create Cargo project",
+                        "stderr": result.stderr,
+                    },
                 )
 
             # Copy the program to src/lib.rs
@@ -52,127 +67,28 @@ async def evaluate(program_path: str) -> EvaluationResult:
                 dst.write(lib_content)
 
             # Create main.rs with benchmark code
-            main_content = """
-use sort_test::{adaptive_sort, run_benchmark};
-use std::time::Instant;
-
-fn main() {
-    // Generate test datasets with different characteristics
-    let test_data = vec![
-        // Random data
-        generate_random_data(1000),
-        generate_random_data(10000),
-        
-        // Nearly sorted data
-        generate_nearly_sorted_data(1000, 0.05),
-        generate_nearly_sorted_data(10000, 0.05),
-        
-        // Reverse sorted data
-        generate_reverse_sorted_data(1000),
-        generate_reverse_sorted_data(10000),
-        
-        // Data with many duplicates
-        generate_data_with_duplicates(1000, 10),
-        generate_data_with_duplicates(10000, 100),
-        
-        // Partially sorted data
-        generate_partially_sorted_data(1000, 0.3),
-        generate_partially_sorted_data(10000, 0.3),
-    ];
-    
-    let results = run_benchmark(test_data);
-    
-    // Calculate metrics
-    let all_correct = results.correctness.iter().all(|&c| c);
-    let correctness_score = if all_correct { 1.0 } else { 0.0 };
-    
-    let avg_time: f64 = results.times.iter().sum::<f64>() / results.times.len() as f64;
-    
-    // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset)
-    let performance_score = 1.0 / (1.0 + avg_time * 10.0);
-    
-    // Output results as JSON
-    println!("{{");
-    println!("  \\"correctness\\": {},", correctness_score);
-    println!("  \\"avg_time\\": {},", avg_time);
-    println!("  \\"performance_score\\": {},", performance_score);
-    println!("  \\"adaptability_score\\": {},", results.adaptability_score);
-    println!("  \\"times\\": {:?},", results.times);
-    println!("  \\"all_correct\\": {}", all_correct);
-    println!("}}");
-}
-
-fn generate_random_data(size: usize) -> Vec<i32> {
-    (0..size).map(|_| rand::random::<i32>() % 10000).collect()
-}
-
-fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec<i32> {
-    let mut data: Vec<i32> = (0..size as i32).collect();
-    let swaps = (size as f64 * disorder_rate) as usize;
-    
-    for _ in 0..swaps {
-        let i = rand::random::<usize>() % size;
-        let j = rand::random::<usize>() % size;
-        data.swap(i, j);
-    }
-    
-    data
-}
-
-fn generate_reverse_sorted_data(size: usize) -> Vec<i32> {
-    (0..size as i32).rev().collect()
-}
-
-fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec<i32> {
-    (0..size).map(|_| rand::random::<i32>() % unique_values as i32).collect()
-}
-
-fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec<i32> {
-    let sorted_size = (size as f64 * sorted_fraction) as usize;
-    let mut data = Vec::with_capacity(size);
-    
-    // Add sorted portion
-    data.extend((0..sorted_size as i32));
-    
-    // Add random portion
-    data.extend((0..(size - sorted_size)).map(|_| rand::random::<i32>() % 10000));
-    
-    data
-}
-
-// Simple random implementation
-mod rand {
-    use std::cell::Cell;
-    use std::time::{SystemTime, UNIX_EPOCH};
-    
-    thread_local! {
-        static SEED: Cell<u64> = Cell::new(
-            SystemTime::now()
-                .duration_since(UNIX_EPOCH)
-                .unwrap()
-                .as_secs()
-        );
-    }
-    
-    pub fn random<T>() -> T
-    where
-        T: From<u64>,
-    {
-        SEED.with(|seed| {
-            let mut x = seed.get();
-            x ^= x << 13;
-            x ^= x >> 7;
-            x ^= x << 17;
-            seed.set(x);
-            T::from(x)
-        })
-    }
-}
-"""
+            project_source_dir = THIS_FILE_DIR / "sort_test"
+            main_file_source = project_source_dir / "src" / "main.rs"
+            with open(main_file_source, "r") as f:
+                main_content = f.read()
             main_path = project_dir / "src" / "main.rs"
             with open(main_path, "w") as f:
                 f.write(main_content)
 
+            cargo_toml_source = project_source_dir / "Cargo.toml"
+            with open(cargo_toml_source, "r") as f:
+                cargo_toml_content = f.read()
+            cargo_toml_path = project_dir / "Cargo.toml"
+            with open(cargo_toml_path, "w") as f:
+                f.write(cargo_toml_content)
+
+            cargo_lock_source = project_source_dir / "Cargo.lock"
+            with open(cargo_lock_source, "r") as f:
+                cargo_lock_content = f.read()
+            cargo_lock_path = project_dir / "Cargo.lock"
+            with open(cargo_lock_path, "w") as f:
+                f.write(cargo_lock_content)
+
             # Build the project
             build_result = subprocess.run(
                 ["cargo", "build", "--release"],
@@ -305,7 +221,7 @@ async def evaluate(program_path: str) -> EvaluationResult:
     import sys
 
     if len(sys.argv) > 1:
-        result = asyncio.run(evaluate(sys.argv[1]))
+        result = evaluate(sys.argv[1])
         print(f"Score: {result.metrics['score']:.4f}")
         print(f"Correctness: {result.metrics['correctness']:.4f}")
         print(f"Performance: {result.metrics['performance_score']:.4f}")
diff --git a/examples/rust_adaptive_sort/sort_test/.gitignore b/examples/rust_adaptive_sort/sort_test/.gitignore
new file mode 100644
index 00000000..9f970225
--- /dev/null
+++ b/examples/rust_adaptive_sort/sort_test/.gitignore
@@ -0,0 +1 @@
+target/
\ No newline at end of file
diff --git a/examples/rust_adaptive_sort/sort_test/Cargo.lock b/examples/rust_adaptive_sort/sort_test/Cargo.lock
new file mode 100644
index 00000000..9e84a6ad
--- /dev/null
+++ b/examples/rust_adaptive_sort/sort_test/Cargo.lock
@@ -0,0 +1,148 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.177"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e0f6df8eaa422d97d72edcd152e1451618fed47fabbdbd5a8864167b1d4aff7"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "sort_test"
+version = "0.1.0"
+dependencies = [
+ "rand",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
+
+[[package]]
+name = "wasip2"
+version = "1.0.1+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/examples/rust_adaptive_sort/sort_test/Cargo.toml b/examples/rust_adaptive_sort/sort_test/Cargo.toml
new file mode 100644
index 00000000..743fcf0e
--- /dev/null
+++ b/examples/rust_adaptive_sort/sort_test/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "sort_test"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+rand = "0.9.2"
diff --git a/examples/rust_adaptive_sort/sort_test/src/lib.rs b/examples/rust_adaptive_sort/sort_test/src/lib.rs
new file mode 100644
index 00000000..e172d78e
--- /dev/null
+++ b/examples/rust_adaptive_sort/sort_test/src/lib.rs
@@ -0,0 +1,156 @@
+// Adaptive Sorting Algorithm Implementation from initial_program.rs
+// This program implements a sorting algorithm that can be evolved to adapt to different data patterns
+
+use std::cmp::Ordering;
+
+// EVOLVE-BLOCK-START
+// Initial implementation: Simple quicksort
+// This can be evolved to:
+// - Hybrid algorithms (introsort, timsort-like)
+// - Adaptive pivot selection
+// - Special handling for nearly sorted data
+// - Switching to different algorithms based on data characteristics
+
+pub fn adaptive_sort<T: Ord + Clone>(arr: &mut [T]) {
+    if arr.len() <= 1 {
+        return;
+    }
+
+    // Use quicksort as the base implementation
+    quicksort(arr, 0, arr.len() - 1);
+}
+
+fn quicksort<T: Ord + Clone>(arr: &mut [T], low: usize, high: usize) {
+    if low < high {
+        let pivot_index = partition(arr, low, high);
+
+        // Recursively sort elements before and after partition
+        if pivot_index > 0 {
+            quicksort(arr, low, pivot_index - 1);
+        }
+        quicksort(arr, pivot_index + 1, high);
+    }
+}
+
+fn partition<T: Ord + Clone>(arr: &mut [T], low: usize, high: usize) -> usize {
+    // Choose the last element as pivot (can be evolved to use better strategies)
+    let pivot = arr[high].clone();
+    let mut i = low;
+
+    for j in low..high {
+        if arr[j] <= pivot {
+            arr.swap(i, j);
+            i += 1;
+        }
+    }
+
+    arr.swap(i, high);
+    i
+}
+
+// Helper function to detect if array is nearly sorted
+fn is_nearly_sorted<T: Ord>(arr: &[T], threshold: f64) -> bool {
+    if arr.len() <= 1 {
+        return true;
+    }
+
+    let mut inversions = 0;
+    let max_inversions = ((arr.len() * (arr.len() - 1)) / 2) as f64 * threshold;
+
+    for i in 0..arr.len() - 1 {
+        for j in i + 1..arr.len() {
+            if arr[i] > arr[j] {
+                inversions += 1;
+                if inversions as f64 > max_inversions {
+                    return false;
+                }
+            }
+        }
+    }
+
+    true
+}
+
+// Helper function for insertion sort (useful for small arrays)
+fn insertion_sort<T: Ord>(arr: &mut [T]) {
+    for i in 1..arr.len() {
+        let mut j = i;
+        while j > 0 && arr[j - 1] > arr[j] {
+            arr.swap(j, j - 1);
+            j -= 1;
+        }
+    }
+}
+// EVOLVE-BLOCK-END
+
+// Benchmark function to test the sort implementation
+pub fn run_benchmark(test_data: Vec<Vec<i32>>) -> BenchmarkResults {
+    let mut results = BenchmarkResults {
+        times: Vec::new(),
+        correctness: Vec::new(),
+        adaptability_score: 0.0,
+    };
+
+    for data in test_data {
+        let mut arr = data.clone();
+        let start = std::time::Instant::now();
+
+        adaptive_sort(&mut arr);
+
+        let elapsed = start.elapsed();
+        results.times.push(elapsed.as_secs_f64());
+
+        // Check if correctly sorted
+        let is_sorted = arr.windows(2).all(|w| w[0] <= w[1]);
+        results.correctness.push(is_sorted);
+    }
+
+    // Calculate adaptability score based on performance variance
+    if results.times.len() > 1 {
+        let mean_time: f64 = results.times.iter().sum::<f64>() / results.times.len() as f64;
+        let variance: f64 = results
+            .times
+            .iter()
+            .map(|t| (t - mean_time).powi(2))
+            .sum::<f64>()
+            / results.times.len() as f64;
+
+        // Lower variance means better adaptability
+        results.adaptability_score = 1.0 / (1.0 + variance.sqrt());
+    }
+
+    results
+}
+
+#[derive(Debug)]
+pub struct BenchmarkResults {
+    pub times: Vec<f64>,
+    pub correctness: Vec<bool>,
+    pub adaptability_score: f64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic_sort() {
+        let mut arr = vec![3, 1, 4, 1, 5, 9, 2, 6];
+        adaptive_sort(&mut arr);
+        assert_eq!(arr, vec![1, 1, 2, 3, 4, 5, 6, 9]);
+    }
+
+    #[test]
+    fn test_empty_array() {
+        let mut arr: Vec<i32> = vec![];
+        adaptive_sort(&mut arr);
+        assert_eq!(arr, vec![]);
+    }
+
+    #[test]
+    fn test_single_element() {
+        let mut arr = vec![42];
+        adaptive_sort(&mut arr);
+        assert_eq!(arr, vec![42]);
+    }
+}
diff --git a/examples/rust_adaptive_sort/sort_test/src/main.rs b/examples/rust_adaptive_sort/sort_test/src/main.rs
new file mode 100644
index 00000000..83487f44
--- /dev/null
+++ b/examples/rust_adaptive_sort/sort_test/src/main.rs
@@ -0,0 +1,88 @@
+use rand::prelude::*;
+use sort_test::{adaptive_sort, run_benchmark};
+use std::time::Instant;
+
+fn main() {
+    // Generate test datasets with different characteristics
+    let test_data = vec![
+        // Random data
+        generate_random_data(1000),
+        generate_random_data(10000),
+        // Nearly sorted data
+        generate_nearly_sorted_data(1000, 0.05),
+        generate_nearly_sorted_data(10000, 0.05),
+        // Reverse sorted data
+        generate_reverse_sorted_data(1000),
+        generate_reverse_sorted_data(10000),
+        // Data with many duplicates
+        generate_data_with_duplicates(1000, 10),
+        generate_data_with_duplicates(10000, 100),
+        // Partially sorted data
+        generate_partially_sorted_data(1000, 0.3),
+        generate_partially_sorted_data(10000, 0.3),
+    ];
+
+    let results = run_benchmark(test_data);
+
+    // Calculate metrics
+    let all_correct = results.correctness.iter().all(|&c| c);
+    let correctness_score = if all_correct { 1.0 } else { 0.0 };
+
+    let avg_time: f64 = results.times.iter().sum::<f64>() / results.times.len() as f64;
+
+    // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset)
+    let performance_score = 1.0 / (1.0 + avg_time * 10.0);
+
+    // Output results as JSON
+    println!("{{");
+    println!("  \"correctness\": {},", correctness_score);
+    println!("  \"avg_time\": {},", avg_time);
+    println!("  \"performance_score\": {},", performance_score);
+    println!("  \"adaptability_score\": {},", results.adaptability_score);
+    println!("  \"times\": {:?},", results.times);
+    println!("  \"all_correct\": {}", all_correct);
+    println!("}}");
+}
+
+fn generate_random_data(size: usize) -> Vec<i32> {
+    let mut rng = rand::rng();
+    (0..size).map(|_| rng.random::<i32>() % 10000).collect()
+}
+
+fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec<i32> {
+    let mut data: Vec<i32> = (0..size as i32).collect();
+    let swaps = (size as f64 * disorder_rate) as usize;
+    let mut rng = rand::rng();
+    for _ in 0..swaps {
+        let i = rng.random::<u64>() as usize % size;
+        let j = rng.random::<u64>() as usize % size;
+        data.swap(i, j);
+    }
+
+    data
+}
+
+fn generate_reverse_sorted_data(size: usize) -> Vec<i32> {
+    (0..size as i32).rev().collect()
+}
+
+fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec<i32> {
+    let mut rng = rand::rng();
+    (0..size)
+        .map(|_| rng.random::<i32>() % unique_values as i32)
+        .collect()
+}
+
+fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec<i32> {
+    let mut rng = rand::rng();
+    let sorted_size = (size as f64 * sorted_fraction) as usize;
+    let mut data = Vec::with_capacity(size);
+
+    // Add sorted portion
+    data.extend((0..sorted_size as i32));
+
+    // Add random portion
+    data.extend((0..(size - sorted_size)).map(|_| rng.random::<i32>() % 10000));
+
+    data
+}