Skip to content

Commit 2afbd0c

Browse files
feat: e2e tests
1 parent 3c6819f commit 2afbd0c

15 files changed

+851
-287
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,6 @@ src/ui/next-env.d.ts
230230
!src/ui/public/manifest.json
231231
!src/ui/serve.json
232232
.eslintcache
233+
234+
# vllm-sim
235+
bin/

tests/e2e/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# E2E tests
2+
3+
The E2E tests in GuideLLM use the [vLLM simulator by llm-d](https://llm-d.ai/docs/architecture/Components/inf-simulator), to run them run the following command:
4+
5+
```shell
6+
docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./
7+
```
8+
9+
Then to run the tests:
10+
```shell
11+
tox -e test-e2e
12+
```
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# E2E test for max error rate constraint functionality
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from tests.e2e.utils import (
8+
GuidellmClient,
9+
assert_constraint_triggered,
10+
assert_no_python_exceptions,
11+
cleanup_report_file,
12+
load_benchmark_report,
13+
)
14+
from tests.e2e.vllm_sim_server import VllmSimServer
15+
16+
17+
@pytest.fixture(scope="module")
18+
def server():
19+
"""
20+
Pytest fixture to start and stop the server for the entire module
21+
using the TestServer class.
22+
"""
23+
server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo")
24+
try:
25+
server.start()
26+
yield server # Yield the URL for tests to use
27+
finally:
28+
server.stop() # Teardown: Stop the server after tests are done
29+
30+
31+
@pytest.mark.timeout(30)
32+
def test_max_error_benchmark(server: VllmSimServer):
33+
"""
34+
Test that the max error rate constraint is properly triggered when server goes down.
35+
"""
36+
report_path = Path("tests/e2e/max_error_benchmarks.json")
37+
rate = 10
38+
max_error_rate = 0.1
39+
40+
# Create and configure the guidellm client
41+
client = GuidellmClient(target=server.get_url(), output_path=report_path)
42+
43+
try:
44+
# Start the benchmark
45+
client.start_benchmark(
46+
rate=rate,
47+
max_seconds=25,
48+
max_error_rate=max_error_rate,
49+
)
50+
51+
# Wait for the benchmark to complete (server will be stopped after 10 seconds)
52+
client.wait_for_completion(timeout=30, stop_server_after=10, server=server)
53+
54+
# Assert no Python exceptions occurred
55+
assert_no_python_exceptions(client.stderr)
56+
57+
# Load and validate the report
58+
report = load_benchmark_report(report_path)
59+
benchmark = report["benchmarks"][0]
60+
61+
# Check that the max error rate constraint was triggered
62+
assert_constraint_triggered(
63+
benchmark,
64+
"max_error_rate",
65+
{
66+
"exceeded_error_rate": True,
67+
"current_error_rate": lambda rate: rate >= max_error_rate,
68+
},
69+
)
70+
71+
finally:
72+
cleanup_report_file(report_path)

tests/e2e/test_placeholder.py

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# E2E tests for successful benchmark scenarios with timing validation
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from tests.e2e.utils import (
8+
GuidellmClient,
9+
assert_constraint_triggered,
10+
assert_no_python_exceptions,
11+
assert_successful_requests_fields,
12+
cleanup_report_file,
13+
load_benchmark_report,
14+
)
15+
from tests.e2e.vllm_sim_server import VllmSimServer
16+
17+
18+
@pytest.fixture(scope="module")
19+
def server():
20+
"""
21+
Pytest fixture to start and stop the server for the entire module
22+
using the TestServer class.
23+
"""
24+
server = VllmSimServer(
25+
port=8000,
26+
model="databricks/dolly-v2-12b",
27+
mode="echo",
28+
time_to_first_token=1, # 1ms TTFT
29+
inter_token_latency=1, # 1ms ITL
30+
)
31+
try:
32+
server.start()
33+
yield server # Yield the URL for tests to use
34+
finally:
35+
server.stop() # Teardown: Stop the server after tests are done
36+
37+
38+
@pytest.mark.timeout(30)
39+
def test_max_seconds_benchmark(server: VllmSimServer):
40+
"""
41+
Test that the max seconds constraint is properly triggered.
42+
"""
43+
report_path = Path("tests/e2e/max_duration_benchmarks.json")
44+
rate = 10
45+
46+
# Create and configure the guidellm client
47+
client = GuidellmClient(target=server.get_url(), output_path=report_path)
48+
49+
try:
50+
# Start the benchmark
51+
client.start_benchmark(
52+
rate=rate,
53+
max_seconds=1,
54+
)
55+
56+
# Wait for the benchmark to complete
57+
client.wait_for_completion(timeout=30)
58+
59+
# Assert no Python exceptions occurred
60+
assert_no_python_exceptions(client.stderr)
61+
62+
# Load and validate the report
63+
report = load_benchmark_report(report_path)
64+
benchmark = report["benchmarks"][0]
65+
66+
# Check that the max duration constraint was triggered
67+
assert_constraint_triggered(
68+
benchmark, "max_seconds", {"duration_exceeded": True}
69+
)
70+
71+
# Validate successful requests have all expected fields
72+
successful_requests = benchmark["requests"]["successful"]
73+
assert_successful_requests_fields(successful_requests)
74+
75+
finally:
76+
cleanup_report_file(report_path)
77+
78+
79+
@pytest.mark.timeout(30)
80+
def test_max_requests_benchmark(server: VllmSimServer):
81+
"""
82+
Test that the max requests constraint is properly triggered.
83+
"""
84+
report_path = Path("tests/e2e/max_number_benchmarks.json")
85+
rate = 10
86+
87+
# Create and configure the guidellm client
88+
client = GuidellmClient(target=server.get_url(), output_path=report_path)
89+
90+
try:
91+
# Start the benchmark
92+
client.start_benchmark(
93+
rate=rate,
94+
max_requests=rate,
95+
)
96+
97+
# Wait for the benchmark to complete
98+
client.wait_for_completion(timeout=30)
99+
100+
# Assert no Python exceptions occurred
101+
assert_no_python_exceptions(client.stderr)
102+
103+
# Load and validate the report
104+
report = load_benchmark_report(report_path)
105+
benchmark = report["benchmarks"][0]
106+
107+
# Check that the max requests constraint was triggered
108+
assert_constraint_triggered(
109+
benchmark, "max_requests", {"processed_exceeded": True}
110+
)
111+
112+
# Validate successful requests have all expected fields
113+
successful_requests = benchmark["requests"]["successful"]
114+
assert len(successful_requests) == rate, (
115+
f"Expected {rate} successful requests, got {len(successful_requests)}"
116+
)
117+
assert_successful_requests_fields(successful_requests)
118+
119+
finally:
120+
cleanup_report_file(report_path)

0 commit comments

Comments
 (0)