44
55import asyncio
66import json
7- import os
87import subprocess
98import tempfile
10- import time
119from pathlib import Path
12- from typing import Dict , Any , List
10+ from openevolve .evaluation_result import EvaluationResult
11+ import logging
12+ import os
1313
14- import numpy as np
14+ THIS_FILE_DIR = Path (os .path .dirname (os .path .realpath (__file__ )))
15+
16+ logger = logging .getLogger ("examples.rust_adaptive_sort.evaluator" )
1517
16- from openevolve .evaluation_result import EvaluationResult
1718
1819
19- async def evaluate (program_path : str ) -> EvaluationResult :
20+ def evaluate (program_path : str ) -> EvaluationResult :
21+ result = asyncio .run (_evaluate (program_path ))
22+ if "error" in result .artifacts :
23+ logger .error (f"Error evaluating program: { result .artifacts ['error' ]} " )
24+ if "stderr" in result .artifacts :
25+ logger .error (f"Stderr: { result .artifacts ['stderr' ]} " )
26+ if "stdout" in result .artifacts :
27+ logger .error (f"Stdout: { result .artifacts ['stdout' ]} " )
28+ return result
29+
30+
31+ async def _evaluate (program_path : str ) -> EvaluationResult :
2032 """
2133 Evaluate a Rust sorting algorithm implementation.
2234
@@ -41,7 +53,10 @@ async def evaluate(program_path: str) -> EvaluationResult:
4153 if result .returncode != 0 :
4254 return EvaluationResult (
4355 metrics = {"score" : 0.0 , "compile_success" : 0.0 },
44- artifacts = {"error" : "Failed to create Cargo project" , "stderr" : result .stderr },
56+ artifacts = {
57+ "error" : "Failed to create Cargo project" ,
58+ "stderr" : result .stderr ,
59+ },
4560 )
4661
4762 # Copy the program to src/lib.rs
@@ -52,127 +67,28 @@ async def evaluate(program_path: str) -> EvaluationResult:
5267 dst .write (lib_content )
5368
5469 # Create main.rs with benchmark code
55- main_content = """
56- use sort_test::{adaptive_sort, run_benchmark};
57- use std::time::Instant;
58-
59- fn main() {
60- // Generate test datasets with different characteristics
61- let test_data = vec![
62- // Random data
63- generate_random_data(1000),
64- generate_random_data(10000),
65-
66- // Nearly sorted data
67- generate_nearly_sorted_data(1000, 0.05),
68- generate_nearly_sorted_data(10000, 0.05),
69-
70- // Reverse sorted data
71- generate_reverse_sorted_data(1000),
72- generate_reverse_sorted_data(10000),
73-
74- // Data with many duplicates
75- generate_data_with_duplicates(1000, 10),
76- generate_data_with_duplicates(10000, 100),
77-
78- // Partially sorted data
79- generate_partially_sorted_data(1000, 0.3),
80- generate_partially_sorted_data(10000, 0.3),
81- ];
82-
83- let results = run_benchmark(test_data);
84-
85- // Calculate metrics
86- let all_correct = results.correctness.iter().all(|&c| c);
87- let correctness_score = if all_correct { 1.0 } else { 0.0 };
88-
89- let avg_time: f64 = results.times.iter().sum::<f64>() / results.times.len() as f64;
90-
91- // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset)
92- let performance_score = 1.0 / (1.0 + avg_time * 10.0);
93-
94- // Output results as JSON
95- println!("{{");
96- println!(" \\ "correctness\\ ": {},", correctness_score);
97- println!(" \\ "avg_time\\ ": {},", avg_time);
98- println!(" \\ "performance_score\\ ": {},", performance_score);
99- println!(" \\ "adaptability_score\\ ": {},", results.adaptability_score);
100- println!(" \\ "times\\ ": {:?},", results.times);
101- println!(" \\ "all_correct\\ ": {}", all_correct);
102- println!("}}");
103- }
104-
105- fn generate_random_data(size: usize) -> Vec<i32> {
106- (0..size).map(|_| rand::random::<i32>() % 10000).collect()
107- }
108-
109- fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec<i32> {
110- let mut data: Vec<i32> = (0..size as i32).collect();
111- let swaps = (size as f64 * disorder_rate) as usize;
112-
113- for _ in 0..swaps {
114- let i = rand::random::<usize>() % size;
115- let j = rand::random::<usize>() % size;
116- data.swap(i, j);
117- }
118-
119- data
120- }
121-
122- fn generate_reverse_sorted_data(size: usize) -> Vec<i32> {
123- (0..size as i32).rev().collect()
124- }
125-
126- fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec<i32> {
127- (0..size).map(|_| rand::random::<i32>() % unique_values as i32).collect()
128- }
129-
130- fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec<i32> {
131- let sorted_size = (size as f64 * sorted_fraction) as usize;
132- let mut data = Vec::with_capacity(size);
133-
134- // Add sorted portion
135- data.extend((0..sorted_size as i32));
136-
137- // Add random portion
138- data.extend((0..(size - sorted_size)).map(|_| rand::random::<i32>() % 10000));
139-
140- data
141- }
142-
143- // Simple random implementation
144- mod rand {
145- use std::cell::Cell;
146- use std::time::{SystemTime, UNIX_EPOCH};
147-
148- thread_local! {
149- static SEED: Cell<u64> = Cell::new(
150- SystemTime::now()
151- .duration_since(UNIX_EPOCH)
152- .unwrap()
153- .as_secs()
154- );
155- }
156-
157- pub fn random<T>() -> T
158- where
159- T: From<u64>,
160- {
161- SEED.with(|seed| {
162- let mut x = seed.get();
163- x ^= x << 13;
164- x ^= x >> 7;
165- x ^= x << 17;
166- seed.set(x);
167- T::from(x)
168- })
169- }
170- }
171- """
70+ project_source_dir = THIS_FILE_DIR / "sort_test"
71+ main_file_source = project_source_dir / "src" / "main.rs"
72+ with open (main_file_source , "r" ) as f :
73+ main_content = f .read ()
17274 main_path = project_dir / "src" / "main.rs"
17375 with open (main_path , "w" ) as f :
17476 f .write (main_content )
17577
78+ cargo_toml_source = project_source_dir / "Cargo.toml"
79+ with open (cargo_toml_source , "r" ) as f :
80+ cargo_toml_content = f .read ()
81+ cargo_toml_path = project_dir / "Cargo.toml"
82+ with open (cargo_toml_path , "w" ) as f :
83+ f .write (cargo_toml_content )
84+
85+ cargo_lock_source = project_source_dir / "Cargo.lock"
86+ with open (cargo_lock_source , "r" ) as f :
87+ cargo_lock_content = f .read ()
88+ cargo_lock_path = project_dir / "Cargo.lock"
89+ with open (cargo_lock_path , "w" ) as f :
90+ f .write (cargo_lock_content )
91+
17692 # Build the project
17793 build_result = subprocess .run (
17894 ["cargo" , "build" , "--release" ],
@@ -305,7 +221,7 @@ async def evaluate(program_path: str) -> EvaluationResult:
305221 import sys
306222
307223 if len (sys .argv ) > 1 :
308- result = asyncio . run ( evaluate (sys .argv [1 ]) )
224+ result = evaluate (sys .argv [1 ])
309225 print (f"Score: { result .metrics ['score' ]:.4f} " )
310226 print (f"Correctness: { result .metrics ['correctness' ]:.4f} " )
311227 print (f"Performance: { result .metrics ['performance_score' ]:.4f} " )
0 commit comments