Skip to content

Commit 0750c00

Browse files
feat(tests): e2e
1 parent 0c6f679 commit 0750c00

9 files changed

+381
-0
lines changed

tests/e2e/e2e/__init__.py

Whitespace-only changes.

tests/e2e/e2e/e2e/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# E2E tests
2+
3+
The E2E tests in GuideLLM use the [vLLM simulator by llm-d](https://llm-d.ai/docs/architecture/Components/inf-simulator), to run them run the following command:
4+
5+
```shell
6+
docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./
7+
```

tests/e2e/e2e/e2e/__init__.py

Whitespace-only changes.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# test_server_interaction.py
2+
3+
import json
4+
import subprocess
5+
import time
6+
from pathlib import Path
7+
8+
import pytest
9+
from loguru import logger
10+
11+
from tests.e2e.vllm_sim_server import VllmSimServer
12+
13+
14+
@pytest.fixture(scope="module")
15+
def server():
16+
"""
17+
Pytest fixture to start and stop the server for the entire module
18+
using the TestServer class.
19+
"""
20+
server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo")
21+
try:
22+
server.start()
23+
yield server # Yield the URL for tests to use
24+
finally:
25+
server.stop() # Teardown: Stop the server after tests are done
26+
27+
28+
@pytest.mark.timeout(30)
29+
def test_max_error_benchmark(server: VllmSimServer):
30+
"""
31+
Another example test interacting with the server.
32+
"""
33+
report_path = Path("tests/e2e/max_error_benchmarks.json")
34+
rate = 10
35+
max_error_rate = 0.1
36+
command = f"""guidellm benchmark \
37+
--target "{server.get_url()}" \
38+
--rate-type constant \
39+
--rate {rate} \
40+
--max-seconds 60 \
41+
--max-error {max_error_rate} \
42+
--data "prompt_tokens=256,output_tokens=128" \
43+
--output-path {report_path}
44+
"""
45+
logger.info(f"Client command: {command}")
46+
process = subprocess.Popen( # noqa: S603
47+
["/bin/bash", "-c", command],
48+
stdout=subprocess.PIPE,
49+
stderr=subprocess.PIPE,
50+
text=True,
51+
)
52+
logger.info("Waiting for client to start...")
53+
time.sleep(10)
54+
server.stop()
55+
56+
try:
57+
logger.info("Fetching client output")
58+
stdout, stderr = process.communicate()
59+
logger.debug(f"Client stdout:\n{stdout}")
60+
logger.debug(f"Client stderr:\n{stderr}")
61+
62+
assert report_path.exists()
63+
with report_path.open("r") as f:
64+
report = json.load(f)
65+
66+
assert "benchmarks" in report
67+
benchmarks = report["benchmarks"]
68+
assert len(benchmarks) > 0
69+
benchmark = benchmarks[0]
70+
assert "run_stats" in benchmark
71+
run_stats = benchmark["run_stats"]
72+
assert "status" in run_stats
73+
status = run_stats["status"]
74+
assert status == "error"
75+
assert "termination_reason" in run_stats
76+
termination_reason = run_stats["termination_reason"]
77+
assert termination_reason == "max_error_reached"
78+
assert "window_error_rate" in run_stats
79+
window_error_rate = run_stats["window_error_rate"]
80+
assert window_error_rate > max_error_rate
81+
finally:
82+
process.terminate() # Send SIGTERM
83+
try:
84+
process.wait(timeout=5) # Wait for the process to terminate
85+
logger.info("Client stopped successfully.")
86+
except subprocess.TimeoutExpired:
87+
logger.warning("Client did not terminate gracefully, killing it...")
88+
process.kill() # Send SIGKILL if it doesn't terminate
89+
process.wait()
90+
91+
if report_path.exists():
92+
report_path.unlink()

tests/e2e/e2e/e2e/test_placeholder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import pytest
2+
3+
4+
@pytest.mark.smoke
5+
def test_placeholder():
6+
assert True
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# test_server_interaction.py
2+
3+
import json
4+
import os
5+
from pathlib import Path
6+
7+
import pytest
8+
from loguru import logger
9+
10+
from tests.e2e.vllm_sim_server import VllmSimServer
11+
12+
13+
@pytest.fixture(scope="module")
14+
def server():
15+
"""
16+
Pytest fixture to start and stop the server for the entire module
17+
using the TestServer class.
18+
"""
19+
server = VllmSimServer(port=8000, model="databricks/dolly-v2-12b", mode="echo")
20+
try:
21+
server.start()
22+
yield server # Yield the URL for tests to use
23+
finally:
24+
server.stop() # Teardown: Stop the server after tests are done
25+
26+
27+
@pytest.mark.timeout(30)
28+
def test_max_seconds_benchmark(server: VllmSimServer):
29+
"""
30+
Another example test interacting with the server.
31+
"""
32+
report_path = Path("tests/e2e/max_duration_benchmarks.json")
33+
rate = 10
34+
command = f"""
35+
guidellm benchmark \
36+
--target "{server.get_url()}" \
37+
--rate-type constant \
38+
--rate {rate} \
39+
--max-seconds 1 \
40+
--data "prompt_tokens=256,output_tokens=128" \
41+
--output-path {report_path}
42+
"""
43+
44+
logger.info(f"Client command: {command}")
45+
os.system(command) # noqa: S605
46+
47+
assert report_path.exists()
48+
with report_path.open("r") as f:
49+
report = json.load(f)
50+
51+
assert "benchmarks" in report
52+
benchmarks = report["benchmarks"]
53+
assert len(benchmarks) > 0
54+
benchmark = benchmarks[0]
55+
assert "requests" in benchmark
56+
requests = benchmark["requests"]
57+
assert "successful" in requests
58+
successful = requests["successful"]
59+
assert len(successful) > rate
60+
61+
assert "run_stats" in benchmark
62+
run_stats = benchmark["run_stats"]
63+
assert "status" in run_stats
64+
status = run_stats["status"]
65+
assert status == "success"
66+
assert "termination_reason" in run_stats
67+
termination_reason = run_stats["termination_reason"]
68+
assert termination_reason == "max_seconds_reached"
69+
70+
if report_path.exists():
71+
report_path.unlink()
72+
73+
74+
@pytest.mark.timeout(30)
75+
def test_max_requests_benchmark(server: VllmSimServer):
76+
"""
77+
Another example test interacting with the server.
78+
"""
79+
report_path = Path("tests/e2e/max_number_benchmarks.json")
80+
rate = 10
81+
command = f"""
82+
guidellm benchmark \
83+
--target "{server.get_url()}" \
84+
--rate-type constant \
85+
--rate {rate} \
86+
--max-requests {rate} \
87+
--data "prompt_tokens=256,output_tokens=128" \
88+
--output-path {report_path}
89+
"""
90+
91+
logger.info(f"Client command: {command}")
92+
os.system(command) # noqa: S605
93+
94+
assert report_path.exists()
95+
with report_path.open("r") as f:
96+
report = json.load(f)
97+
98+
assert "benchmarks" in report
99+
benchmarks = report["benchmarks"]
100+
assert len(benchmarks) > 0
101+
benchmark = benchmarks[0]
102+
assert "requests" in benchmark
103+
requests = benchmark["requests"]
104+
assert "successful" in requests
105+
successful = requests["successful"]
106+
assert len(successful) == rate
107+
108+
assert "run_stats" in benchmark
109+
run_stats = benchmark["run_stats"]
110+
assert "status" in run_stats
111+
status = run_stats["status"]
112+
assert status == "success"
113+
assert "termination_reason" in run_stats
114+
termination_reason = run_stats["termination_reason"]
115+
assert termination_reason == "max_requests_reached"
116+
117+
if report_path.exists():
118+
report_path.unlink()

tests/e2e/e2e/e2e/vllm-sim.Dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM golang AS base
2+
3+
WORKDIR /app
4+
5+
RUN apt-get update && \
6+
apt-get install -y libzmq3-dev pkg-config && \
7+
git clone https://github.com/llm-d/llm-d-inference-sim.git && \
8+
cd llm-d-inference-sim && \
9+
make build
10+
11+
WORKDIR /app/llm-d-inference-sim
12+
13+
FROM scratch
14+
COPY --from=base /app/llm-d-inference-sim/bin/llm-d-inference-sim /bin/llm-d-inference-sim

tests/e2e/e2e/e2e/vllm_sim_server.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import subprocess
2+
import time
3+
from pathlib import Path
4+
from typing import Optional
5+
6+
import pytest
7+
import requests
8+
from loguru import logger
9+
10+
11+
class VllmSimServer:
12+
"""
13+
[vLLM simulator](https://llm-d.ai/docs/architecture/Components/inf-simulator)
14+
A vLLM simulator wrapper for pytest.
15+
"""
16+
17+
def __init__(
18+
self,
19+
port: int,
20+
model: str,
21+
lora: Optional[list[str]] = None,
22+
mode: Optional[str] = None,
23+
echo: Optional[bool] = None,
24+
random: Optional[bool] = None,
25+
time_to_first_token: Optional[float] = None,
26+
inter_token_latency: Optional[float] = None,
27+
max_loras: Optional[int] = None,
28+
max_cpu_loras: Optional[int] = None,
29+
max_running_requests: Optional[int] = None,
30+
):
31+
self.port = port
32+
self.model = model
33+
self.lora = lora
34+
self.mode = mode
35+
self.echo = echo
36+
self.random = random
37+
self.time_to_first_token = time_to_first_token
38+
self.inter_token_latency = inter_token_latency
39+
self.max_loras = max_loras
40+
self.max_cpu_loras = max_cpu_loras
41+
self.max_running_requests = max_running_requests
42+
self.server_url = f"http://127.0.0.1:{self.port}"
43+
self.health_url = f"{self.server_url}/health"
44+
self.app_script = "./bin/llm-d-inference-sim"
45+
self.process: Optional[subprocess.Popen] = None
46+
if not Path(self.app_script).exists():
47+
message = (
48+
"The vLLM simulator binary is required for E2E tests, but is missing.\n"
49+
"To build it and enable E2E tests, please run:\n"
50+
"docker build . -f tests/e2e/vllm-sim.Dockerfile -o type=local,dest=./"
51+
)
52+
logger.warning(message)
53+
pytest.skip("vLLM simlator binary missing", allow_module_level=True)
54+
55+
def get_cli_parameters(self) -> list[str]:
56+
parameters = ["--port", f"{self.port}", "--model", self.model]
57+
if self.lora is not None:
58+
parameters.extend(["--lora", ",".join(self.lora)])
59+
if self.mode is not None:
60+
parameters.extend(["--mode", self.mode])
61+
if self.echo is not None:
62+
parameters.extend(["--echo"])
63+
if self.random is not None:
64+
parameters.extend(["--random"])
65+
if self.time_to_first_token is not None:
66+
parameters.extend(["--time-to-first-token", f"{self.time_to_first_token}"])
67+
if self.inter_token_latency is not None:
68+
parameters.extend(["--inter-token-latency", f"{self.inter_token_latency}"])
69+
if self.max_loras is not None:
70+
parameters.extend(["--max-loras", f"{self.max_loras}"])
71+
if self.max_cpu_loras is not None:
72+
parameters.extend(["--max-cpu-loras", f"{self.max_cpu_loras}"])
73+
if self.max_running_requests is not None:
74+
parameters.extend(
75+
["--max-running-requests", f"{self.max_running_requests}"]
76+
)
77+
return parameters
78+
79+
def start(self):
80+
"""
81+
Starts the server process and waits for it to become healthy.
82+
"""
83+
84+
logger.info(f"Starting server on {self.server_url} using {self.app_script}...")
85+
cli_parameters = self.get_cli_parameters()
86+
command = " ".join([self.app_script, *cli_parameters])
87+
logger.info(f"Server command: {command}")
88+
self.process = subprocess.Popen( # noqa: S603
89+
[self.app_script, *cli_parameters],
90+
stdout=subprocess.PIPE,
91+
stderr=subprocess.PIPE,
92+
text=True, # Decode stdout/stderr as text
93+
)
94+
95+
# Wait for the server to start and become healthy
96+
max_retries = 20
97+
retry_delay_sec = 0.5
98+
for i in range(max_retries):
99+
try:
100+
response = requests.get(self.health_url, timeout=1)
101+
if response.status_code == 200:
102+
logger.info(f"Server started successfully at {self.server_url}")
103+
return
104+
else:
105+
logger.warning(f"Got response with status: {response.status_code}")
106+
logger.warning(response.json())
107+
except requests.ConnectionError:
108+
logger.warning(f"Waiting for server... (attempt {i + 1}/{max_retries})")
109+
time.sleep(retry_delay_sec)
110+
# If the loop completes without breaking, the server didn't start
111+
stdout, stderr = self.process.communicate()
112+
logger.error(f"Server failed to start after {max_retries} retries.")
113+
logger.error(f"Server stdout:\n{stdout}")
114+
logger.error(f"Server stderr:\n{stderr}")
115+
self.stop() # Attempt to clean up
116+
pytest.fail("Server did not start within the expected time.")
117+
118+
def stop(self):
119+
"""
120+
Stops the server process.
121+
"""
122+
if self.process:
123+
logger.info(f"Stopping server on {self.server_url}...")
124+
self.process.terminate() # Send SIGTERM
125+
try:
126+
self.process.wait(timeout=1) # Wait for the process to terminate
127+
logger.info("Server stopped successfully.")
128+
except subprocess.TimeoutExpired:
129+
logger.warning("Server did not terminate gracefully, killing it...")
130+
self.process.kill() # Send SIGKILL if it doesn't terminate
131+
self.process.wait()
132+
self.process = None # Clear the process reference
133+
134+
def get_url(self):
135+
"""
136+
Returns the base URL of the running server.
137+
"""
138+
return self.server_url

tests/e2e/e2e/test_placeholder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import pytest
2+
3+
4+
@pytest.mark.smoke
5+
def test_placeholder():
6+
assert True

0 commit comments

Comments
 (0)