fix: e2e tests work as expected

AlonKellner-Jounce · AlonKellner-Jounce · commit f1d9db9b56b4 · 2025-07-22T06:58:24.000Z
diff --git a/tests/e2e/test_interrupted.py b/tests/e2e/test_interrupted.py
@@ -39,7 +39,7 @@ def test_interrupted_report(server: VllmSimServer):
   --target "{server.get_url()}" \
   --rate-type constant \
   --rate {rate} \
-  --max-seconds 1000 \
+  --max-seconds 60 \
   --max-error {max_error_rate} \
   --data "prompt_tokens=256,output_tokens=128" \
   --output-path {report_path}
@@ -52,37 +52,37 @@ def test_interrupted_report(server: VllmSimServer):
     logger.info("Waiting for client to start...")
     time.sleep(5)
     server.stop()
-    logger.info("Waiting for client to stop...")
-    time.sleep(5)
-
-    logger.info("Fetching client output")
-    stdout, stderr = process.communicate()
-    logger.info(f"Client stdout:\n{stdout}")
-    logger.info(f"Client stderr:\n{stderr}")
 
-    assert report_path.exists()
-    with report_path.open("r") as f:
-        report = json.load(f)
+    try:
+        logger.info("Fetching client output")
+        stdout, stderr = process.communicate()
+        logger.info(f"Client stdout:\n{stdout}")
+        logger.info(f"Client stderr:\n{stderr}")
 
-    assert "benchmarks" in report
-    benchmarks = report["benchmarks"]
-    assert len(benchmarks) > 0
-    benchmark = benchmarks[0]
-    assert "requests" in benchmark
-    requests = benchmark["requests"]
-    assert "successful" in requests
-    successful = requests["successful"]
-    assert "errored" in requests
-    errored = requests["errored"]
-    assert len(errored) / (len(successful) + len(errored)) > max_error_rate
+        assert report_path.exists()
+        with report_path.open("r") as f:
+            report = json.load(f)
 
-    report_path.unlink()
+        assert "benchmarks" in report
+        benchmarks = report["benchmarks"]
+        assert len(benchmarks) > 0
+        benchmark = benchmarks[0]
+        assert "requests" in benchmark
+        requests = benchmark["requests"]
+        assert "successful" in requests
+        successful = requests["successful"]
+        assert "errored" in requests
+        errored = requests["errored"]
+        assert len(errored) / (len(successful) + len(errored)) > max_error_rate
+    finally:
+        if report_path.exists():
+            report_path.unlink()
 
-    process.terminate()  # Send SIGTERM
-    try:
-        process.wait(timeout=5)  # Wait for the process to terminate
-        logger.info("Client stopped successfully.")
-    except subprocess.TimeoutExpired:
-        logger.warning("Client did not terminate gracefully, killing it...")
-        process.kill()  # Send SIGKILL if it doesn't terminate
-        process.wait()
+        process.terminate()  # Send SIGTERM
+        try:
+            process.wait(timeout=5)  # Wait for the process to terminate
+            logger.info("Client stopped successfully.")
+        except subprocess.TimeoutExpired:
+            logger.warning("Client did not terminate gracefully, killing it...")
+            process.kill()  # Send SIGKILL if it doesn't terminate
+            process.wait()
diff --git a/tests/e2e/vllm_sim_server.py b/tests/e2e/vllm_sim_server.py
@@ -115,7 +115,7 @@ def stop(self):
             logger.info(f"Stopping server on {self.server_url}...")
             self.process.terminate()  # Send SIGTERM
             try:
-                self.process.wait(timeout=5)  # Wait for the process to terminate
+                self.process.wait(timeout=1)  # Wait for the process to terminate
                 logger.info("Server stopped successfully.")
             except subprocess.TimeoutExpired:
                 logger.warning("Server did not terminate gracefully, killing it...")