Skip to content

Commit 2efad69

Browse files
authored
add random concurrent workload template for inference-perf (#635)
* add random concurrent workload template for inference-perf * Rename sanity_random_concurrent.yaml.in to random_concurrent.yaml.in Signed-off-by: Xia Hua <huaxi@google.com> --------- Signed-off-by: Xia Hua <huaxi@google.com>
1 parent 598f11f commit 2efad69

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
load:
2+
type: concurrent
3+
stages:
4+
# Stage 1: 1 concurrent user, 32 total requests
5+
- concurrency_level: 1
6+
num_requests: 32
7+
# Stage 2: 2 concurrent users, 64 total requests
8+
- concurrency_level: 2
9+
num_requests: 64
10+
# Stage 3: 4 concurrent users, 128 total requests
11+
- concurrency_level: 4
12+
num_requests: 128
13+
# Stage 4: 8 concurrent users, 256 total requests
14+
- concurrency_level: 8
15+
num_requests: 256
16+
api:
17+
type: completion
18+
streaming: true
19+
server:
20+
type: vllm
21+
model_name: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
22+
base_url: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
23+
ignore_eos: true
24+
tokenizer:
25+
pretrained_model_name_or_path: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
26+
data:
27+
type: random
28+
input_distribution:
29+
min: 5000 # min length of the synthetic prompts
30+
max: 10000 # max length of the synthetic prompts
31+
mean: 10000 # mean length of the synthetic prompts
32+
std: 500 # standard deviation of the length of the synthetic prompts
33+
output_distribution:
34+
min: 500 # min length of the output to be generated
35+
max: 1000 # max length of the output to be generated
36+
mean: 1000 # mean length of the output to be generated
37+
std: 100 # standard deviation of the length of the output to be generated
38+
report:
39+
request_lifecycle:
40+
summary: true
41+
per_stage: true
42+
per_request: true
43+
storage:
44+
local_storage:
45+
path: /workspace

0 commit comments

Comments
 (0)