Create test_library_api.py

codelion · codelion · commit 171659612de1 · 2025-09-04T19:31:18.000+08:00
diff --git a/tests/integration/test_library_api.py b/tests/integration/test_library_api.py
@@ -0,0 +1,275 @@
+"""
+Integration tests for OpenEvolve library API with real LLM inference
+Tests the end-to-end flow of using OpenEvolve as a library
+"""
+
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+
+from openevolve import run_evolution, evolve_function, evolve_code, evolve_algorithm
+
+
+class TestLibraryAPIIntegration:
+    """Test OpenEvolve library API with real LLM integration"""
+
+    @pytest.mark.asyncio
+    async def test_evolve_function_real_integration(
+        self,
+        optillm_server,
+        temp_workspace
+    ):
+        """Test evolve_function with real optillm server - simple optimization task"""
+        
+        def simple_multiply(x, y):
+            """A simple function that can be optimized"""
+            # Inefficient implementation that can be improved
+            result = 0
+            for i in range(x):
+                result += y
+            return result
+        
+        # Test cases - the function should return x * y
+        test_cases = [
+            ((2, 3), 6),
+            ((4, 5), 20),
+            ((1, 7), 7),
+            ((0, 10), 0)
+        ]
+        
+        print("Testing evolve_function with real LLM...")
+        
+        # Run evolution with minimal iterations for testing
+        result = evolve_function(
+            simple_multiply,
+            test_cases,
+            iterations=3,  # Small number for fast testing
+            output_dir=str(temp_workspace / "evolve_function_output"),
+            cleanup=False  # Keep files for inspection
+        )
+        
+        # Verify the result structure
+        assert result is not None
+        assert hasattr(result, 'best_score')
+        assert hasattr(result, 'best_code')
+        assert hasattr(result, 'metrics')
+        assert hasattr(result, 'output_dir')
+        
+        # Basic checks
+        assert result.best_score >= 0.0
+        assert "def simple_multiply" in result.best_code
+        assert result.output_dir == str(temp_workspace / "evolve_function_output")
+        
+        # Check that output directory was created
+        output_path = Path(result.output_dir)
+        assert output_path.exists()
+        assert (output_path / "best").exists()
+        
+        print(f"✅ evolve_function completed successfully!")
+        print(f"   Best score: {result.best_score}")
+        print(f"   Output dir: {result.output_dir}")
+        print(f"   Code length: {len(result.best_code)} chars")
+
+    @pytest.mark.asyncio  
+    async def test_evolve_code_real_integration(
+        self,
+        optillm_server,
+        temp_workspace
+    ):
+        """Test evolve_code with real optillm server - code string optimization"""
+        
+        # Initial code that can be optimized
+        initial_code = """
+# EVOLVE-BLOCK-START
+def fibonacci(n):
+    # Inefficient recursive implementation
+    if n <= 1:
+        return n
+    return fibonacci(n-1) + fibonacci(n-2)
+# EVOLVE-BLOCK-END
+"""
+        
+        def fibonacci_evaluator(program_path):
+            """Simple evaluator for fibonacci function"""
+            try:
+                # Import the evolved program
+                import importlib.util
+                spec = importlib.util.spec_from_file_location("evolved", program_path)
+                module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(module)
+                
+                # Test the function
+                if hasattr(module, 'fibonacci'):
+                    fib = module.fibonacci
+                    
+                    # Test cases
+                    test_cases = [
+                        (0, 0), (1, 1), (2, 1), (3, 2), (4, 3), (5, 5)
+                    ]
+                    
+                    correct = 0
+                    for input_val, expected in test_cases:
+                        try:
+                            result = fib(input_val)
+                            if result == expected:
+                                correct += 1
+                        except:
+                            pass
+                    
+                    accuracy = correct / len(test_cases)
+                    return {
+                        "score": accuracy,
+                        "correctness": accuracy,
+                        "test_cases_passed": correct
+                    }
+                else:
+                    return {"score": 0.0, "error": "fibonacci function not found"}
+                    
+            except Exception as e:
+                return {"score": 0.0, "error": str(e)}
+        
+        print("Testing evolve_code with real LLM...")
+        
+        # Run evolution
+        result = evolve_code(
+            initial_code,
+            fibonacci_evaluator,
+            iterations=2,  # Small number for fast testing
+            output_dir=str(temp_workspace / "evolve_code_output")
+        )
+        
+        # Verify result structure
+        assert result is not None
+        assert result.best_score >= 0.0
+        assert "fibonacci" in result.best_code.lower()
+        assert "# EVOLVE-BLOCK-START" in result.best_code
+        assert "# EVOLVE-BLOCK-END" in result.best_code
+        
+        # Check output directory
+        output_path = Path(result.output_dir)
+        assert output_path.exists()
+        
+        print(f"✅ evolve_code completed successfully!")
+        print(f"   Best score: {result.best_score}")
+        print(f"   Output dir: {result.output_dir}")
+
+    @pytest.mark.asyncio
+    async def test_run_evolution_real_integration(
+        self,
+        optillm_server,
+        temp_workspace
+    ):
+        """Test run_evolution with real optillm server - basic program evolution"""
+        
+        # Create initial program file
+        initial_program = temp_workspace / "initial_program.py"
+        initial_program.write_text("""
+# Simple sorting program to evolve
+# EVOLVE-BLOCK-START
+def sort_numbers(numbers):
+    # Basic bubble sort implementation
+    n = len(numbers)
+    for i in range(n):
+        for j in range(0, n - i - 1):
+            if numbers[j] > numbers[j + 1]:
+                numbers[j], numbers[j + 1] = numbers[j + 1], numbers[j]
+    return numbers
+# EVOLVE-BLOCK-END
+""")
+        
+        # Create evaluator file
+        evaluator_file = temp_workspace / "evaluator.py"
+        evaluator_file.write_text("""
+def evaluate(program_path):
+    \"\"\"Evaluate sorting function performance\"\"\"
+    try:
+        import importlib.util
+        spec = importlib.util.spec_from_file_location("program", program_path)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        
+        if hasattr(module, 'sort_numbers'):
+            sort_func = module.sort_numbers
+            
+            # Test cases
+            test_cases = [
+                [3, 1, 4, 1, 5],
+                [9, 2, 6, 5, 3],
+                [1],
+                [],
+                [2, 1]
+            ]
+            
+            correct = 0
+            for test_case in test_cases:
+                try:
+                    input_copy = test_case.copy()
+                    result = sort_func(input_copy)
+                    expected = sorted(test_case)
+                    if result == expected:
+                        correct += 1
+                except:
+                    pass
+            
+            accuracy = correct / len(test_cases) if test_cases else 0
+            return {
+                "score": accuracy,
+                "correctness": accuracy,
+                "complexity": 10,  # Fixed complexity for simplicity
+            }
+        else:
+            return {"score": 0.0, "error": "sort_numbers function not found"}
+            
+    except Exception as e:
+        return {"score": 0.0, "error": str(e)}
+""")
+        
+        print("Testing run_evolution with real LLM...")
+        
+        # Run evolution using file paths (most common usage)
+        result = run_evolution(
+            initial_program=str(initial_program),
+            evaluator=str(evaluator_file),
+            iterations=2,
+            output_dir=str(temp_workspace / "run_evolution_output")
+        )
+        
+        # Verify result
+        assert result is not None
+        assert result.best_score >= 0.0
+        assert "sort_numbers" in result.best_code
+        
+        # Check that files were created
+        output_path = Path(result.output_dir)
+        assert output_path.exists()
+        assert (output_path / "best").exists()
+        assert (output_path / "checkpoints").exists()
+        
+        print(f"✅ run_evolution completed successfully!")
+        print(f"   Best score: {result.best_score}")
+        print(f"   Output dir: {result.output_dir}")
+        
+        # Test string input as well
+        print("Testing run_evolution with string inputs...")
+        
+        result2 = run_evolution(
+            initial_program=initial_program.read_text(),
+            evaluator=lambda path: {"score": 0.8, "test": "passed"},  # Simple callable evaluator
+            iterations=1,
+            output_dir=str(temp_workspace / "run_evolution_string_output")
+        )
+        
+        assert result2 is not None
+        assert result2.best_score >= 0.0
+        
+        print(f"✅ run_evolution with string inputs completed!")
+
+
+@pytest.fixture
+def temp_workspace():
+    """Create a temporary workspace for integration tests"""
+    temp_dir = tempfile.mkdtemp()
+    workspace = Path(temp_dir)
+    yield workspace
+    shutil.rmtree(temp_dir, ignore_errors=True)