skip slow tess

codelion · codelion · commit 2021d0b28814 · 2025-09-05T07:07:41.000+08:00
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -77,13 +77,14 @@ jobs:
           OPTILLM_API_KEY: optillm
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
 
-      - name: Run integration tests
+      - name: Run integration tests (excluding slow tests)
         env:
           OPENAI_API_KEY: optillm
           OPTILLM_API_KEY: optillm
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
-          pytest tests/integration -v --tb=short
+          # Run only fast integration tests, skip slow tests that require real LLM
+          pytest tests/integration -v --tb=short -m "not slow"
           
       - name: Stop optillm server
         if: always()
diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py
@@ -35,11 +35,13 @@ def __init__(
         self.random_seed = getattr(model_cfg, "random_seed", None)
 
         # Set up API client
+        # OpenAI client requires max_retries to be int, not None
+        max_retries = self.retries if self.retries is not None else 0
         self.client = openai.OpenAI(
             api_key=self.api_key,
             base_url=self.api_base,
             timeout=self.timeout,
-            max_retries=self.retries,
+            max_retries=max_retries,
         )
 
         # Only log unique models to reduce duplication
diff --git a/pyproject.toml b/pyproject.toml
@@ -49,6 +49,13 @@ disallow_incomplete_defs = true
 [project.scripts]
 openevolve-run = "openevolve.cli:main"
 
+[tool.pytest.ini_options]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests requiring external services"
+]
+addopts = "--strict-markers"
+
 [tool.setuptools.packages.find]
 include = ["openevolve*"]
 
diff --git a/tests/integration/test_checkpoint_with_llm.py b/tests/integration/test_checkpoint_with_llm.py
@@ -10,6 +10,7 @@
 class TestCheckpointWithLLM:
     """Test checkpoints with real LLM generation"""
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_checkpoint_intervals_with_real_llm(
         self,
@@ -52,6 +53,7 @@ async def test_checkpoint_intervals_with_real_llm(
         if len(controller.database.programs) > 1:  # More than just initial program
             assert len(checkpoint_calls) > 0, "Should have at least one checkpoint call if evolution succeeded"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_checkpoint_resume_functionality(
         self,
@@ -94,6 +96,7 @@ async def test_checkpoint_resume_functionality(
         else:
             print("No checkpoints directory created")
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_final_checkpoint_creation(
         self,
@@ -127,6 +130,7 @@ async def test_final_checkpoint_creation(
         # This depends on the controller logic, so we just verify the system didn't crash
         assert len(controller.database.programs) >= 1, "Should have at least the initial program"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_checkpoint_with_best_program_save(
         self,
diff --git a/tests/integration/test_evolution_pipeline.py b/tests/integration/test_evolution_pipeline.py
@@ -10,6 +10,7 @@
 class TestEvolutionPipeline:
     """Test complete evolution with real LLM generation"""
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_full_evolution_loop(
         self, 
@@ -61,6 +62,7 @@ async def test_full_evolution_loop(
         populated_islands = [i for i, count in island_counts.items() if count > 0]
         assert len(populated_islands) >= 1, "At least one island should have programs"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_island_feature_maps_populated(
         self,
@@ -98,6 +100,7 @@ async def test_island_feature_maps_populated(
                 assert program_id in controller.database.programs, \
                     f"Program {program_id} in island {island_idx} feature map not found in database"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_evolution_with_small_model_succeeds(
         self,
@@ -134,6 +137,7 @@ async def test_evolution_with_small_model_succeeds(
             # It's okay if no log files - depends on config
             print(f"Found {len(log_files)} log files")
 
+    @pytest.mark.slow
     @pytest.mark.asyncio 
     async def test_best_program_tracking(
         self,
diff --git a/tests/integration/test_library_api.py b/tests/integration/test_library_api.py
@@ -43,6 +43,7 @@ def _get_library_test_config(port: int = 8000) -> Config:
 class TestLibraryAPIIntegration:
     """Test OpenEvolve library API with real LLM integration"""
 
+    @pytest.mark.slow
     def test_evolve_function_real_integration(
         self,
         optillm_server,
@@ -100,6 +101,7 @@ def simple_multiply(x, y):
         print(f"   Output dir: {result.output_dir}")
         print(f"   Code length: {len(result.best_code)} chars")
 
+    @pytest.mark.slow
     def test_evolve_code_real_integration(
         self,
         optillm_server,
@@ -185,6 +187,7 @@ def fibonacci_evaluator(program_path):
         print(f"   Best score: {result.best_score}")
         print(f"   Output dir: {result.output_dir}")
 
+    @pytest.mark.slow
     def test_run_evolution_real_integration(
         self,
         optillm_server,
diff --git a/tests/integration/test_migration_with_llm.py b/tests/integration/test_migration_with_llm.py
@@ -10,6 +10,7 @@
 class TestMigrationWithLLM:
     """Test island migration with real LLM generation"""
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_island_migration_no_duplicates_real_evolution(
         self,
@@ -70,6 +71,7 @@ async def test_island_migration_no_duplicates_real_evolution(
                 assert "_migrant" not in migrant.id, \
                     f"Migrant program {migrant.id} has _migrant suffix"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_per_island_map_elites_isolation(
         self,
@@ -112,6 +114,7 @@ async def test_per_island_map_elites_isolation(
                 assert program_island == island_idx, \
                     f"Program {program_id} island mismatch: in map {island_idx} but metadata says {program_island}"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_migration_preserves_program_quality(
         self,
@@ -162,6 +165,7 @@ async def test_migration_preserves_program_quality(
             # Most importantly: no _migrant_ suffix
             assert "_migrant" not in migrant.id, f"Migrant {migrant.id} should not have _migrant suffix"
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_migration_timing_logic(
         self,
@@ -206,6 +210,7 @@ async def test_migration_timing_logic(
             # but the system should have at least considered it
             print(f"Migration should have been considered (max gen: {max_generation})")
 
+    @pytest.mark.slow
     @pytest.mark.asyncio
     async def test_single_island_no_migration(
         self,
diff --git a/tests/integration/test_migration_with_llm.py.bak b/tests/integration/test_migration_with_llm.py.bak
diff --git a/tests/integration/test_smoke.py b/tests/integration/test_smoke.py
diff --git a/tests/test_llm_ensemble.py b/tests/test_llm_ensemble.py