fix: e2e & unit with main merged

AlonKellner-Jounce · AlonKellner-Jounce · commit 24c6ea9d8082 · 2025-07-22T10:02:06.000Z
diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py
@@ -98,6 +98,7 @@ class Config:
     ] = None
     max_seconds: Optional[PositiveFloat] = None
     max_requests: Optional[PositiveInt] = None
+    max_error: Optional[PositiveFloat] = None
     warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
     cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
     output_sampling: Optional[NonNegativeInt] = None
diff --git a/tests/e2e/test_basic.py b/tests/e2e/test_basic.py
@@ -16,7 +16,7 @@ def server():
     Pytest fixture to start and stop the server for the entire module
     using the TestServer class.
     """
-    server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b")
+    server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo")
     try:
         server.start()
         yield server  # Yield the URL for tests to use
diff --git a/tests/e2e/test_interrupted.py b/tests/e2e/test_interrupted.py
@@ -17,7 +17,7 @@ def server():
     Pytest fixture to start and stop the server for the entire module
     using the TestServer class.
     """
-    server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b")
+    server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo")
     try:
         server.start()
         yield server  # Yield the URL for tests to use
@@ -43,19 +43,21 @@ def test_interrupted_report(server: VllmSimServer):
   --output-path {report_path}
               """
     logger.info(f"Client command: {command}")
-    process = subprocess.Popen(["/bin/bash", "-c", command],  # noqa: S603
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True)
+    process = subprocess.Popen(  # noqa: S603
+        ["/bin/bash", "-c", command],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+    )
     logger.info("Waiting for client to start...")
     time.sleep(10)
     server.stop()
 
     try:
         logger.info("Fetching client output")
         stdout, stderr = process.communicate()
-        logger.info(f"Client stdout:\n{stdout}")
-        logger.info(f"Client stderr:\n{stderr}")
+        logger.trace(f"Client stdout:\n{stdout}")
+        logger.trace(f"Client stderr:\n{stderr}")
 
         assert report_path.exists()
         with report_path.open("r") as f:
diff --git a/tests/e2e/vllm_sim_server.py b/tests/e2e/vllm_sim_server.py
@@ -1,5 +1,6 @@
 import subprocess
 import time
+from pathlib import Path
 from typing import Optional
 
 import pytest
@@ -42,6 +43,14 @@ def __init__(
         self.health_url = f"{self.server_url}/health"
         self.app_script = "./bin/llm-d-inference-sim"
         self.process: Optional[subprocess.Popen] = None
+        if not Path(self.app_script).exists():
+            message = (
+                "The vLLM simulator binary is required for E2E tests, but is missing.\n"
+                "To build it and enable E2E tests, please run:\n"
+                "docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./"
+            )
+            logger.warning(message)
+            pytest.skip("vLLM simlator binary missing", allow_module_level=True)
 
     def get_cli_parameters(self) -> list[str]:
         parameters = ["--port", f"{self.port}", "--model", self.model]
diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.json b/tests/unit/entrypoints/assets/benchmarks_stripped.json
diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.yaml b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml
@@ -25,6 +25,7 @@ benchmarks:
       type_: synchronous
     max_number:
     max_duration: 30
+    max_error:
     warmup_number:
     warmup_duration:
     cooldown_number:
@@ -47,6 +48,7 @@ benchmarks:
     request_start_time_targeted_delay_avg: 0.6319856542222043
     request_time_delay_avg: 0.00029866238857837433
     request_time_avg: 0.6370967195389119
+    error_rate: 0.0
   worker:
     type_: generative_requests_worker
     backend_type: openai_http
diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt
@@ -5,7 +5,7 @@ Benchmarks Metadata:
     Duration:30.2 seconds
     Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant'],                   
     max_concurrency=None                                                                                                                                                            
-    Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, cooldown_duration=None
+    Args:max_number=None, max_duration=30.0, max_error=None, warmup_number=None, warmup_duration=None, cooldown_number=None, cooldown_duration=None
     Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' backend_model='example_model' backend_info={'max_output_tokens': 16384,    
     'timeout': 300, 'http2': True, 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', 'chat_completions_path':              
     '/v1/chat/completions'}                                                                                                                                                         
@@ -18,7 +18,7 @@ Benchmarks Info:
 Metadata                                    |||| Requests Made  ||| Prompt Tok/Req ||| Output Tok/Req  ||| Prompt Tok Total||| Output Tok Total  ||
   Benchmark| Start Time| End Time| Duration (s)|  Comp|  Inc|  Err|  Comp|   Inc| Err|   Comp|  Inc|  Err|   Comp|  Inc|  Err|   Comp|   Inc|   Err
 -----------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----|-----|-------|------|------
-synchronous|   16:59:28| 16:59:58|         30.0|    46|    1|    0| 257.1| 256.0| 0.0|  128.0|  0.0|  0.0|  11827|  256|    0|   5888|     0|     0
+synchronous|   20:59:28| 20:59:58|         30.0|    46|    1|    0| 257.1| 256.0| 0.0|  128.0|  0.0|  0.0|  11827|  256|    0|   5888|     0|     0
 ===================================================================================================================================================
                  
                  
diff --git a/tests/unit/preprocess/test_dataset.py b/tests/unit/preprocess/test_dataset.py
@@ -133,8 +133,6 @@ def test_process_dataset_non_empty(
     mock_save_to_file,
     tokenizer_mock,
 ):
-    from guidellm.preprocess.dataset import process_dataset
-
     mock_dataset = [{"prompt": "Hello"}, {"prompt": "How are you?"}]
     mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
     mock_check_processor.return_value = tokenizer_mock