Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/rust_adaptive_sort/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ file_suffix: ".rs"

# LLM configuration
llm:
primary_model: "gemini-2.5-flash-lite-preview-06-17"
primary_model: "gemini-flash-lite-latest"
primary_model_weight: 0.8
secondary_model: "gemini-2.5-flash"
secondary_model: "gemini-flash-latest"
secondary_model_weight: 0.2
api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"

temperature: 0.7
max_tokens: 4096
max_tokens: 16384

# Custom system message for Rust performance programming
system_message: |
Expand Down
166 changes: 41 additions & 125 deletions examples/rust_adaptive_sort/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,31 @@

import asyncio
import json
import os
import subprocess
import tempfile
import time
from pathlib import Path
from typing import Dict, Any, List
from openevolve.evaluation_result import EvaluationResult
import logging
import os

import numpy as np
THIS_FILE_DIR = Path(os.path.dirname(os.path.realpath(__file__)))

logger = logging.getLogger("examples.rust_adaptive_sort.evaluator")

from openevolve.evaluation_result import EvaluationResult


async def evaluate(program_path: str) -> EvaluationResult:
def evaluate(program_path: str) -> EvaluationResult:
result = asyncio.run(_evaluate(program_path))
if "error" in result.artifacts:
logger.error(f"Error evaluating program: {result.artifacts['error']}")
if "stderr" in result.artifacts:
logger.error(f"Stderr: {result.artifacts['stderr']}")
if "stdout" in result.artifacts:
logger.error(f"Stdout: {result.artifacts['stdout']}")
return result


async def _evaluate(program_path: str) -> EvaluationResult:
"""
Evaluate a Rust sorting algorithm implementation.

Expand All @@ -41,7 +53,10 @@ async def evaluate(program_path: str) -> EvaluationResult:
if result.returncode != 0:
return EvaluationResult(
metrics={"score": 0.0, "compile_success": 0.0},
artifacts={"error": "Failed to create Cargo project", "stderr": result.stderr},
artifacts={
"error": "Failed to create Cargo project",
"stderr": result.stderr,
},
)

# Copy the program to src/lib.rs
Expand All @@ -52,127 +67,28 @@ async def evaluate(program_path: str) -> EvaluationResult:
dst.write(lib_content)

# Create main.rs with benchmark code
main_content = """
use sort_test::{adaptive_sort, run_benchmark};
use std::time::Instant;

fn main() {
// Generate test datasets with different characteristics
let test_data = vec![
// Random data
generate_random_data(1000),
generate_random_data(10000),

// Nearly sorted data
generate_nearly_sorted_data(1000, 0.05),
generate_nearly_sorted_data(10000, 0.05),

// Reverse sorted data
generate_reverse_sorted_data(1000),
generate_reverse_sorted_data(10000),

// Data with many duplicates
generate_data_with_duplicates(1000, 10),
generate_data_with_duplicates(10000, 100),

// Partially sorted data
generate_partially_sorted_data(1000, 0.3),
generate_partially_sorted_data(10000, 0.3),
];

let results = run_benchmark(test_data);

// Calculate metrics
let all_correct = results.correctness.iter().all(|&c| c);
let correctness_score = if all_correct { 1.0 } else { 0.0 };

let avg_time: f64 = results.times.iter().sum::<f64>() / results.times.len() as f64;

// Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset)
let performance_score = 1.0 / (1.0 + avg_time * 10.0);

// Output results as JSON
println!("{{");
println!(" \\"correctness\\": {},", correctness_score);
println!(" \\"avg_time\\": {},", avg_time);
println!(" \\"performance_score\\": {},", performance_score);
println!(" \\"adaptability_score\\": {},", results.adaptability_score);
println!(" \\"times\\": {:?},", results.times);
println!(" \\"all_correct\\": {}", all_correct);
println!("}}");
}

fn generate_random_data(size: usize) -> Vec<i32> {
(0..size).map(|_| rand::random::<i32>() % 10000).collect()
}

fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec<i32> {
let mut data: Vec<i32> = (0..size as i32).collect();
let swaps = (size as f64 * disorder_rate) as usize;

for _ in 0..swaps {
let i = rand::random::<usize>() % size;
let j = rand::random::<usize>() % size;
data.swap(i, j);
}

data
}

fn generate_reverse_sorted_data(size: usize) -> Vec<i32> {
(0..size as i32).rev().collect()
}

fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec<i32> {
(0..size).map(|_| rand::random::<i32>() % unique_values as i32).collect()
}

fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec<i32> {
let sorted_size = (size as f64 * sorted_fraction) as usize;
let mut data = Vec::with_capacity(size);

// Add sorted portion
data.extend((0..sorted_size as i32));

// Add random portion
data.extend((0..(size - sorted_size)).map(|_| rand::random::<i32>() % 10000));

data
}

// Simple random implementation
mod rand {
use std::cell::Cell;
use std::time::{SystemTime, UNIX_EPOCH};

thread_local! {
static SEED: Cell<u64> = Cell::new(
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
);
}

pub fn random<T>() -> T
where
T: From<u64>,
{
SEED.with(|seed| {
let mut x = seed.get();
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
seed.set(x);
T::from(x)
})
}
}
"""
project_source_dir = THIS_FILE_DIR / "sort_test"
main_file_source = project_source_dir / "src" / "main.rs"
with open(main_file_source, "r") as f:
main_content = f.read()
main_path = project_dir / "src" / "main.rs"
with open(main_path, "w") as f:
f.write(main_content)

cargo_toml_source = project_source_dir / "Cargo.toml"
with open(cargo_toml_source, "r") as f:
cargo_toml_content = f.read()
cargo_toml_path = project_dir / "Cargo.toml"
with open(cargo_toml_path, "w") as f:
f.write(cargo_toml_content)

cargo_lock_source = project_source_dir / "Cargo.lock"
with open(cargo_lock_source, "r") as f:
cargo_lock_content = f.read()
cargo_lock_path = project_dir / "Cargo.lock"
with open(cargo_lock_path, "w") as f:
f.write(cargo_lock_content)

# Build the project
build_result = subprocess.run(
["cargo", "build", "--release"],
Expand Down Expand Up @@ -305,7 +221,7 @@ async def evaluate(program_path: str) -> EvaluationResult:
import sys

if len(sys.argv) > 1:
result = asyncio.run(evaluate(sys.argv[1]))
result = evaluate(sys.argv[1])
print(f"Score: {result.metrics['score']:.4f}")
print(f"Correctness: {result.metrics['correctness']:.4f}")
print(f"Performance: {result.metrics['performance_score']:.4f}")
Expand Down
1 change: 1 addition & 0 deletions examples/rust_adaptive_sort/sort_test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target/
148 changes: 148 additions & 0 deletions examples/rust_adaptive_sort/sort_test/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions examples/rust_adaptive_sort/sort_test/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[package]
name = "sort_test"
version = "0.1.0"
edition = "2024"

[dependencies]
rand = "0.9.2"
Loading