diff --git a/examples/rust_adaptive_sort/config.yaml b/examples/rust_adaptive_sort/config.yaml index 2d48e94d..f69f8c96 100644 --- a/examples/rust_adaptive_sort/config.yaml +++ b/examples/rust_adaptive_sort/config.yaml @@ -8,14 +8,14 @@ file_suffix: ".rs" # LLM configuration llm: - primary_model: "gemini-2.5-flash-lite-preview-06-17" + primary_model: "gemini-flash-lite-latest" primary_model_weight: 0.8 - secondary_model: "gemini-2.5-flash" + secondary_model: "gemini-flash-latest" secondary_model_weight: 0.2 api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" temperature: 0.7 - max_tokens: 4096 + max_tokens: 16384 # Custom system message for Rust performance programming system_message: | diff --git a/examples/rust_adaptive_sort/evaluator.py b/examples/rust_adaptive_sort/evaluator.py index 65f4ae29..f8ff8b5f 100644 --- a/examples/rust_adaptive_sort/evaluator.py +++ b/examples/rust_adaptive_sort/evaluator.py @@ -4,19 +4,31 @@ import asyncio import json -import os import subprocess import tempfile -import time from pathlib import Path -from typing import Dict, Any, List +from openevolve.evaluation_result import EvaluationResult +import logging +import os -import numpy as np +THIS_FILE_DIR = Path(os.path.dirname(os.path.realpath(__file__))) + +logger = logging.getLogger("examples.rust_adaptive_sort.evaluator") -from openevolve.evaluation_result import EvaluationResult -async def evaluate(program_path: str) -> EvaluationResult: +def evaluate(program_path: str) -> EvaluationResult: + result = asyncio.run(_evaluate(program_path)) + if "error" in result.artifacts: + logger.error(f"Error evaluating program: {result.artifacts['error']}") + if "stderr" in result.artifacts: + logger.error(f"Stderr: {result.artifacts['stderr']}") + if "stdout" in result.artifacts: + logger.error(f"Stdout: {result.artifacts['stdout']}") + return result + + +async def _evaluate(program_path: str) -> EvaluationResult: """ Evaluate a Rust sorting algorithm implementation. @@ -41,7 +53,10 @@ async def evaluate(program_path: str) -> EvaluationResult: if result.returncode != 0: return EvaluationResult( metrics={"score": 0.0, "compile_success": 0.0}, - artifacts={"error": "Failed to create Cargo project", "stderr": result.stderr}, + artifacts={ + "error": "Failed to create Cargo project", + "stderr": result.stderr, + }, ) # Copy the program to src/lib.rs @@ -52,127 +67,28 @@ async def evaluate(program_path: str) -> EvaluationResult: dst.write(lib_content) # Create main.rs with benchmark code - main_content = """ -use sort_test::{adaptive_sort, run_benchmark}; -use std::time::Instant; - -fn main() { - // Generate test datasets with different characteristics - let test_data = vec![ - // Random data - generate_random_data(1000), - generate_random_data(10000), - - // Nearly sorted data - generate_nearly_sorted_data(1000, 0.05), - generate_nearly_sorted_data(10000, 0.05), - - // Reverse sorted data - generate_reverse_sorted_data(1000), - generate_reverse_sorted_data(10000), - - // Data with many duplicates - generate_data_with_duplicates(1000, 10), - generate_data_with_duplicates(10000, 100), - - // Partially sorted data - generate_partially_sorted_data(1000, 0.3), - generate_partially_sorted_data(10000, 0.3), - ]; - - let results = run_benchmark(test_data); - - // Calculate metrics - let all_correct = results.correctness.iter().all(|&c| c); - let correctness_score = if all_correct { 1.0 } else { 0.0 }; - - let avg_time: f64 = results.times.iter().sum::() / results.times.len() as f64; - - // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset) - let performance_score = 1.0 / (1.0 + avg_time * 10.0); - - // Output results as JSON - println!("{{"); - println!(" \\"correctness\\": {},", correctness_score); - println!(" \\"avg_time\\": {},", avg_time); - println!(" \\"performance_score\\": {},", performance_score); - println!(" \\"adaptability_score\\": {},", results.adaptability_score); - println!(" \\"times\\": {:?},", results.times); - println!(" \\"all_correct\\": {}", all_correct); - println!("}}"); -} - -fn generate_random_data(size: usize) -> Vec { - (0..size).map(|_| rand::random::() % 10000).collect() -} - -fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec { - let mut data: Vec = (0..size as i32).collect(); - let swaps = (size as f64 * disorder_rate) as usize; - - for _ in 0..swaps { - let i = rand::random::() % size; - let j = rand::random::() % size; - data.swap(i, j); - } - - data -} - -fn generate_reverse_sorted_data(size: usize) -> Vec { - (0..size as i32).rev().collect() -} - -fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec { - (0..size).map(|_| rand::random::() % unique_values as i32).collect() -} - -fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec { - let sorted_size = (size as f64 * sorted_fraction) as usize; - let mut data = Vec::with_capacity(size); - - // Add sorted portion - data.extend((0..sorted_size as i32)); - - // Add random portion - data.extend((0..(size - sorted_size)).map(|_| rand::random::() % 10000)); - - data -} - -// Simple random implementation -mod rand { - use std::cell::Cell; - use std::time::{SystemTime, UNIX_EPOCH}; - - thread_local! { - static SEED: Cell = Cell::new( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() - ); - } - - pub fn random() -> T - where - T: From, - { - SEED.with(|seed| { - let mut x = seed.get(); - x ^= x << 13; - x ^= x >> 7; - x ^= x << 17; - seed.set(x); - T::from(x) - }) - } -} -""" + project_source_dir = THIS_FILE_DIR / "sort_test" + main_file_source = project_source_dir / "src" / "main.rs" + with open(main_file_source, "r") as f: + main_content = f.read() main_path = project_dir / "src" / "main.rs" with open(main_path, "w") as f: f.write(main_content) + cargo_toml_source = project_source_dir / "Cargo.toml" + with open(cargo_toml_source, "r") as f: + cargo_toml_content = f.read() + cargo_toml_path = project_dir / "Cargo.toml" + with open(cargo_toml_path, "w") as f: + f.write(cargo_toml_content) + + cargo_lock_source = project_source_dir / "Cargo.lock" + with open(cargo_lock_source, "r") as f: + cargo_lock_content = f.read() + cargo_lock_path = project_dir / "Cargo.lock" + with open(cargo_lock_path, "w") as f: + f.write(cargo_lock_content) + # Build the project build_result = subprocess.run( ["cargo", "build", "--release"], @@ -305,7 +221,7 @@ async def evaluate(program_path: str) -> EvaluationResult: import sys if len(sys.argv) > 1: - result = asyncio.run(evaluate(sys.argv[1])) + result = evaluate(sys.argv[1]) print(f"Score: {result.metrics['score']:.4f}") print(f"Correctness: {result.metrics['correctness']:.4f}") print(f"Performance: {result.metrics['performance_score']:.4f}") diff --git a/examples/rust_adaptive_sort/sort_test/.gitignore b/examples/rust_adaptive_sort/sort_test/.gitignore new file mode 100644 index 00000000..9f970225 --- /dev/null +++ b/examples/rust_adaptive_sort/sort_test/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/examples/rust_adaptive_sort/sort_test/Cargo.lock b/examples/rust_adaptive_sort/sort_test/Cargo.lock new file mode 100644 index 00000000..9e84a6ad --- /dev/null +++ b/examples/rust_adaptive_sort/sort_test/Cargo.lock @@ -0,0 +1,148 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e0f6df8eaa422d97d72edcd152e1451618fed47fabbdbd5a8864167b1d4aff7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "sort_test" +version = "0.1.0" +dependencies = [ + "rand", +] + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/examples/rust_adaptive_sort/sort_test/Cargo.toml b/examples/rust_adaptive_sort/sort_test/Cargo.toml new file mode 100644 index 00000000..743fcf0e --- /dev/null +++ b/examples/rust_adaptive_sort/sort_test/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "sort_test" +version = "0.1.0" +edition = "2024" + +[dependencies] +rand = "0.9.2" diff --git a/examples/rust_adaptive_sort/sort_test/src/lib.rs b/examples/rust_adaptive_sort/sort_test/src/lib.rs new file mode 100644 index 00000000..e172d78e --- /dev/null +++ b/examples/rust_adaptive_sort/sort_test/src/lib.rs @@ -0,0 +1,156 @@ +// Adaptive Sorting Algorithm Implementation from initial_program.rs +// This program implements a sorting algorithm that can be evolved to adapt to different data patterns + +use std::cmp::Ordering; + +// EVOLVE-BLOCK-START +// Initial implementation: Simple quicksort +// This can be evolved to: +// - Hybrid algorithms (introsort, timsort-like) +// - Adaptive pivot selection +// - Special handling for nearly sorted data +// - Switching to different algorithms based on data characteristics + +pub fn adaptive_sort(arr: &mut [T]) { + if arr.len() <= 1 { + return; + } + + // Use quicksort as the base implementation + quicksort(arr, 0, arr.len() - 1); +} + +fn quicksort(arr: &mut [T], low: usize, high: usize) { + if low < high { + let pivot_index = partition(arr, low, high); + + // Recursively sort elements before and after partition + if pivot_index > 0 { + quicksort(arr, low, pivot_index - 1); + } + quicksort(arr, pivot_index + 1, high); + } +} + +fn partition(arr: &mut [T], low: usize, high: usize) -> usize { + // Choose the last element as pivot (can be evolved to use better strategies) + let pivot = arr[high].clone(); + let mut i = low; + + for j in low..high { + if arr[j] <= pivot { + arr.swap(i, j); + i += 1; + } + } + + arr.swap(i, high); + i +} + +// Helper function to detect if array is nearly sorted +fn is_nearly_sorted(arr: &[T], threshold: f64) -> bool { + if arr.len() <= 1 { + return true; + } + + let mut inversions = 0; + let max_inversions = ((arr.len() * (arr.len() - 1)) / 2) as f64 * threshold; + + for i in 0..arr.len() - 1 { + for j in i + 1..arr.len() { + if arr[i] > arr[j] { + inversions += 1; + if inversions as f64 > max_inversions { + return false; + } + } + } + } + + true +} + +// Helper function for insertion sort (useful for small arrays) +fn insertion_sort(arr: &mut [T]) { + for i in 1..arr.len() { + let mut j = i; + while j > 0 && arr[j - 1] > arr[j] { + arr.swap(j, j - 1); + j -= 1; + } + } +} +// EVOLVE-BLOCK-END + +// Benchmark function to test the sort implementation +pub fn run_benchmark(test_data: Vec>) -> BenchmarkResults { + let mut results = BenchmarkResults { + times: Vec::new(), + correctness: Vec::new(), + adaptability_score: 0.0, + }; + + for data in test_data { + let mut arr = data.clone(); + let start = std::time::Instant::now(); + + adaptive_sort(&mut arr); + + let elapsed = start.elapsed(); + results.times.push(elapsed.as_secs_f64()); + + // Check if correctly sorted + let is_sorted = arr.windows(2).all(|w| w[0] <= w[1]); + results.correctness.push(is_sorted); + } + + // Calculate adaptability score based on performance variance + if results.times.len() > 1 { + let mean_time: f64 = results.times.iter().sum::() / results.times.len() as f64; + let variance: f64 = results + .times + .iter() + .map(|t| (t - mean_time).powi(2)) + .sum::() + / results.times.len() as f64; + + // Lower variance means better adaptability + results.adaptability_score = 1.0 / (1.0 + variance.sqrt()); + } + + results +} + +#[derive(Debug)] +pub struct BenchmarkResults { + pub times: Vec, + pub correctness: Vec, + pub adaptability_score: f64, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_sort() { + let mut arr = vec![3, 1, 4, 1, 5, 9, 2, 6]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![1, 1, 2, 3, 4, 5, 6, 9]); + } + + #[test] + fn test_empty_array() { + let mut arr: Vec = vec![]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![]); + } + + #[test] + fn test_single_element() { + let mut arr = vec![42]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![42]); + } +} diff --git a/examples/rust_adaptive_sort/sort_test/src/main.rs b/examples/rust_adaptive_sort/sort_test/src/main.rs new file mode 100644 index 00000000..83487f44 --- /dev/null +++ b/examples/rust_adaptive_sort/sort_test/src/main.rs @@ -0,0 +1,88 @@ +use rand::prelude::*; +use sort_test::{adaptive_sort, run_benchmark}; +use std::time::Instant; + +fn main() { + // Generate test datasets with different characteristics + let test_data = vec![ + // Random data + generate_random_data(1000), + generate_random_data(10000), + // Nearly sorted data + generate_nearly_sorted_data(1000, 0.05), + generate_nearly_sorted_data(10000, 0.05), + // Reverse sorted data + generate_reverse_sorted_data(1000), + generate_reverse_sorted_data(10000), + // Data with many duplicates + generate_data_with_duplicates(1000, 10), + generate_data_with_duplicates(10000, 100), + // Partially sorted data + generate_partially_sorted_data(1000, 0.3), + generate_partially_sorted_data(10000, 0.3), + ]; + + let results = run_benchmark(test_data); + + // Calculate metrics + let all_correct = results.correctness.iter().all(|&c| c); + let correctness_score = if all_correct { 1.0 } else { 0.0 }; + + let avg_time: f64 = results.times.iter().sum::() / results.times.len() as f64; + + // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset) + let performance_score = 1.0 / (1.0 + avg_time * 10.0); + + // Output results as JSON + println!("{{"); + println!(" \"correctness\": {},", correctness_score); + println!(" \"avg_time\": {},", avg_time); + println!(" \"performance_score\": {},", performance_score); + println!(" \"adaptability_score\": {},", results.adaptability_score); + println!(" \"times\": {:?},", results.times); + println!(" \"all_correct\": {}", all_correct); + println!("}}"); +} + +fn generate_random_data(size: usize) -> Vec { + let mut rng = rand::rng(); + (0..size).map(|_| rng.random::() % 10000).collect() +} + +fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec { + let mut data: Vec = (0..size as i32).collect(); + let swaps = (size as f64 * disorder_rate) as usize; + let mut rng = rand::rng(); + for _ in 0..swaps { + let i = rng.random::() as usize % size; + let j = rng.random::() as usize % size; + data.swap(i, j); + } + + data +} + +fn generate_reverse_sorted_data(size: usize) -> Vec { + (0..size as i32).rev().collect() +} + +fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec { + let mut rng = rand::rng(); + (0..size) + .map(|_| rng.random::() % unique_values as i32) + .collect() +} + +fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec { + let mut rng = rand::rng(); + let sorted_size = (size as f64 * sorted_fraction) as usize; + let mut data = Vec::with_capacity(size); + + // Add sorted portion + data.extend((0..sorted_size as i32)); + + // Add random portion + data.extend((0..(size - sorted_size)).map(|_| rng.random::() % 10000)); + + data +}