1+ """
2+ Integration tests for OpenEvolve library API with real LLM inference
3+ Tests the end-to-end flow of using OpenEvolve as a library
4+ """
5+
6+ import pytest
7+ import tempfile
8+ import shutil
9+ from pathlib import Path
10+
11+ from openevolve import run_evolution , evolve_function , evolve_code , evolve_algorithm
12+
13+
14+ class TestLibraryAPIIntegration :
15+ """Test OpenEvolve library API with real LLM integration"""
16+
17+ @pytest .mark .asyncio
18+ async def test_evolve_function_real_integration (
19+ self ,
20+ optillm_server ,
21+ temp_workspace
22+ ):
23+ """Test evolve_function with real optillm server - simple optimization task"""
24+
25+ def simple_multiply (x , y ):
26+ """A simple function that can be optimized"""
27+ # Inefficient implementation that can be improved
28+ result = 0
29+ for i in range (x ):
30+ result += y
31+ return result
32+
33+ # Test cases - the function should return x * y
34+ test_cases = [
35+ ((2 , 3 ), 6 ),
36+ ((4 , 5 ), 20 ),
37+ ((1 , 7 ), 7 ),
38+ ((0 , 10 ), 0 )
39+ ]
40+
41+ print ("Testing evolve_function with real LLM..." )
42+
43+ # Run evolution with minimal iterations for testing
44+ result = evolve_function (
45+ simple_multiply ,
46+ test_cases ,
47+ iterations = 3 , # Small number for fast testing
48+ output_dir = str (temp_workspace / "evolve_function_output" ),
49+ cleanup = False # Keep files for inspection
50+ )
51+
52+ # Verify the result structure
53+ assert result is not None
54+ assert hasattr (result , 'best_score' )
55+ assert hasattr (result , 'best_code' )
56+ assert hasattr (result , 'metrics' )
57+ assert hasattr (result , 'output_dir' )
58+
59+ # Basic checks
60+ assert result .best_score >= 0.0
61+ assert "def simple_multiply" in result .best_code
62+ assert result .output_dir == str (temp_workspace / "evolve_function_output" )
63+
64+ # Check that output directory was created
65+ output_path = Path (result .output_dir )
66+ assert output_path .exists ()
67+ assert (output_path / "best" ).exists ()
68+
69+ print (f"✅ evolve_function completed successfully!" )
70+ print (f" Best score: { result .best_score } " )
71+ print (f" Output dir: { result .output_dir } " )
72+ print (f" Code length: { len (result .best_code )} chars" )
73+
74+ @pytest .mark .asyncio
75+ async def test_evolve_code_real_integration (
76+ self ,
77+ optillm_server ,
78+ temp_workspace
79+ ):
80+ """Test evolve_code with real optillm server - code string optimization"""
81+
82+ # Initial code that can be optimized
83+ initial_code = """
84+ # EVOLVE-BLOCK-START
85+ def fibonacci(n):
86+ # Inefficient recursive implementation
87+ if n <= 1:
88+ return n
89+ return fibonacci(n-1) + fibonacci(n-2)
90+ # EVOLVE-BLOCK-END
91+ """
92+
93+ def fibonacci_evaluator (program_path ):
94+ """Simple evaluator for fibonacci function"""
95+ try :
96+ # Import the evolved program
97+ import importlib .util
98+ spec = importlib .util .spec_from_file_location ("evolved" , program_path )
99+ module = importlib .util .module_from_spec (spec )
100+ spec .loader .exec_module (module )
101+
102+ # Test the function
103+ if hasattr (module , 'fibonacci' ):
104+ fib = module .fibonacci
105+
106+ # Test cases
107+ test_cases = [
108+ (0 , 0 ), (1 , 1 ), (2 , 1 ), (3 , 2 ), (4 , 3 ), (5 , 5 )
109+ ]
110+
111+ correct = 0
112+ for input_val , expected in test_cases :
113+ try :
114+ result = fib (input_val )
115+ if result == expected :
116+ correct += 1
117+ except :
118+ pass
119+
120+ accuracy = correct / len (test_cases )
121+ return {
122+ "score" : accuracy ,
123+ "correctness" : accuracy ,
124+ "test_cases_passed" : correct
125+ }
126+ else :
127+ return {"score" : 0.0 , "error" : "fibonacci function not found" }
128+
129+ except Exception as e :
130+ return {"score" : 0.0 , "error" : str (e )}
131+
132+ print ("Testing evolve_code with real LLM..." )
133+
134+ # Run evolution
135+ result = evolve_code (
136+ initial_code ,
137+ fibonacci_evaluator ,
138+ iterations = 2 , # Small number for fast testing
139+ output_dir = str (temp_workspace / "evolve_code_output" )
140+ )
141+
142+ # Verify result structure
143+ assert result is not None
144+ assert result .best_score >= 0.0
145+ assert "fibonacci" in result .best_code .lower ()
146+ assert "# EVOLVE-BLOCK-START" in result .best_code
147+ assert "# EVOLVE-BLOCK-END" in result .best_code
148+
149+ # Check output directory
150+ output_path = Path (result .output_dir )
151+ assert output_path .exists ()
152+
153+ print (f"✅ evolve_code completed successfully!" )
154+ print (f" Best score: { result .best_score } " )
155+ print (f" Output dir: { result .output_dir } " )
156+
157+ @pytest .mark .asyncio
158+ async def test_run_evolution_real_integration (
159+ self ,
160+ optillm_server ,
161+ temp_workspace
162+ ):
163+ """Test run_evolution with real optillm server - basic program evolution"""
164+
165+ # Create initial program file
166+ initial_program = temp_workspace / "initial_program.py"
167+ initial_program .write_text ("""
168+ # Simple sorting program to evolve
169+ # EVOLVE-BLOCK-START
170+ def sort_numbers(numbers):
171+ # Basic bubble sort implementation
172+ n = len(numbers)
173+ for i in range(n):
174+ for j in range(0, n - i - 1):
175+ if numbers[j] > numbers[j + 1]:
176+ numbers[j], numbers[j + 1] = numbers[j + 1], numbers[j]
177+ return numbers
178+ # EVOLVE-BLOCK-END
179+ """ )
180+
181+ # Create evaluator file
182+ evaluator_file = temp_workspace / "evaluator.py"
183+ evaluator_file .write_text ("""
184+ def evaluate(program_path):
185+ \" \" \" Evaluate sorting function performance\" \" \"
186+ try:
187+ import importlib.util
188+ spec = importlib.util.spec_from_file_location("program", program_path)
189+ module = importlib.util.module_from_spec(spec)
190+ spec.loader.exec_module(module)
191+
192+ if hasattr(module, 'sort_numbers'):
193+ sort_func = module.sort_numbers
194+
195+ # Test cases
196+ test_cases = [
197+ [3, 1, 4, 1, 5],
198+ [9, 2, 6, 5, 3],
199+ [1],
200+ [],
201+ [2, 1]
202+ ]
203+
204+ correct = 0
205+ for test_case in test_cases:
206+ try:
207+ input_copy = test_case.copy()
208+ result = sort_func(input_copy)
209+ expected = sorted(test_case)
210+ if result == expected:
211+ correct += 1
212+ except:
213+ pass
214+
215+ accuracy = correct / len(test_cases) if test_cases else 0
216+ return {
217+ "score": accuracy,
218+ "correctness": accuracy,
219+ "complexity": 10, # Fixed complexity for simplicity
220+ }
221+ else:
222+ return {"score": 0.0, "error": "sort_numbers function not found"}
223+
224+ except Exception as e:
225+ return {"score": 0.0, "error": str(e)}
226+ """ )
227+
228+ print ("Testing run_evolution with real LLM..." )
229+
230+ # Run evolution using file paths (most common usage)
231+ result = run_evolution (
232+ initial_program = str (initial_program ),
233+ evaluator = str (evaluator_file ),
234+ iterations = 2 ,
235+ output_dir = str (temp_workspace / "run_evolution_output" )
236+ )
237+
238+ # Verify result
239+ assert result is not None
240+ assert result .best_score >= 0.0
241+ assert "sort_numbers" in result .best_code
242+
243+ # Check that files were created
244+ output_path = Path (result .output_dir )
245+ assert output_path .exists ()
246+ assert (output_path / "best" ).exists ()
247+ assert (output_path / "checkpoints" ).exists ()
248+
249+ print (f"✅ run_evolution completed successfully!" )
250+ print (f" Best score: { result .best_score } " )
251+ print (f" Output dir: { result .output_dir } " )
252+
253+ # Test string input as well
254+ print ("Testing run_evolution with string inputs..." )
255+
256+ result2 = run_evolution (
257+ initial_program = initial_program .read_text (),
258+ evaluator = lambda path : {"score" : 0.8 , "test" : "passed" }, # Simple callable evaluator
259+ iterations = 1 ,
260+ output_dir = str (temp_workspace / "run_evolution_string_output" )
261+ )
262+
263+ assert result2 is not None
264+ assert result2 .best_score >= 0.0
265+
266+ print (f"✅ run_evolution with string inputs completed!" )
267+
268+
269+ @pytest .fixture
270+ def temp_workspace ():
271+ """Create a temporary workspace for integration tests"""
272+ temp_dir = tempfile .mkdtemp ()
273+ workspace = Path (temp_dir )
274+ yield workspace
275+ shutil .rmtree (temp_dir , ignore_errors = True )
0 commit comments