Skip to content

Commit fafea2d

Browse files
authored
Merge pull request #6 from neuralmagic/update_guidellm
Update guidellm
2 parents 7c46c7d + 0f60fce commit fafea2d

20 files changed

+257
-28
lines changed

examples/guidellm_example.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
GUIDELLM__MAX_CONCURRENCY=256,
1010
GUIDELLM__REQUEST_TIMEOUT=21600,
1111
target="http://localhost:8000/v1",
12-
data_type="emulated",
1312
max_seconds=30,
14-
data="prompt_tokens=512,generated_tokens=256",
13+
#scenario = "benchmarking_32k",
14+
data="prompt_tokens=128,output_tokens=128",
1515
vllm_kwargs={"enable-chunked-prefill": True}
1616
)
1717

1818
task.execute_remotely("oneshot-a100x1")
19-
#task.execute_locally()
19+
#task.execute_locally()

examples/lmeval_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
model_id="meta-llama/Llama-3.2-1B-Instruct",
77
tasks="gsm8k",
88
model_args="dtype=auto,max_model_len=8192",
9-
batch_size="auto",
9+
batch_size="auto",
1010
)
1111

1212
task.execute_remotely("oneshot-a100x1")
13-
#task.execute_locally()
13+
#task.execute_locally()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 128000,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 128000,
8+
"output_tokens": 2048,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 2048
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 16000,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 16000,
8+
"output_tokens": 2048,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 2048
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 32000,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 32000,
8+
"output_tokens": 2048,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 2048
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 64000,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 64000,
8+
"output_tokens": 2048,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 2048
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 512,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 512,
8+
"output_tokens": 256,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 256
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 256,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 256,
8+
"output_tokens": 1024,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 1024
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 1024,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 1024,
8+
"output_tokens": 1024,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 1024
12+
}
13+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"rate_type": "sweep",
3+
"data": {
4+
"prompt_tokens": 768,
5+
"prompt_tokens_stdev": 128,
6+
"prompt_tokens_min": 1,
7+
"prompt_tokens_max": 768,
8+
"output_tokens": 128,
9+
"output_tokens_stdev": 64,
10+
"output_tokens_min": 1,
11+
"output_tokens_max": 128
12+
}
13+
}

0 commit comments

Comments
 (0)