File tree Expand file tree Collapse file tree 1 file changed +45
-0
lines changed
workload/profiles/inference-perf Expand file tree Collapse file tree 1 file changed +45
-0
lines changed Original file line number Diff line number Diff line change 1+ load:
2+ type: concurrent
3+ stages:
4+ # Stage 1: 1 concurrent user, 32 total requests
5+ - concurrency_level: 1
6+ num_requests: 32
7+ # Stage 2: 2 concurrent users, 64 total requests
8+ - concurrency_level: 2
9+ num_requests: 64
10+ # Stage 3: 4 concurrent users, 128 total requests
11+ - concurrency_level: 4
12+ num_requests: 128
13+ # Stage 4: 8 concurrent users, 256 total requests
14+ - concurrency_level: 8
15+ num_requests: 256
16+ api:
17+ type: completion
18+ streaming: true
19+ server:
20+ type: vllm
21+ model_name: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
22+ base_url: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
23+ ignore_eos: true
24+ tokenizer:
25+ pretrained_model_name_or_path: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
26+ data:
27+ type: random
28+ input_distribution:
29+ min: 5000 # min length of the synthetic prompts
30+ max: 10000 # max length of the synthetic prompts
31+ mean: 10000 # mean length of the synthetic prompts
32+ std: 500 # standard deviation of the length of the synthetic prompts
33+ output_distribution:
34+ min: 500 # min length of the output to be generated
35+ max: 1000 # max length of the output to be generated
36+ mean: 1000 # mean length of the output to be generated
37+ std: 100 # standard deviation of the length of the output to be generated
38+ report:
39+ request_lifecycle:
40+ summary: true
41+ per_stage: true
42+ per_request: true
43+ storage:
44+ local_storage:
45+ path: /workspace
You can’t perform that action at this time.
0 commit comments