Merge pull request #2 from neuralmagic/bench-m

tarukumar · web-flow · commit 9af81f764766 · 2025-04-04T19:18:05.000+05:30
Add basic config for benchmark
diff --git a/README.md b/README.md
@@ -8,3 +8,9 @@ The `accuracy` folder contains YAML files for each model that configures informa
 * client.yml: contains settings for the llm-eval-test harness for the model
 * accuracy.yml: contains evaluation tasks and accuracy expectations for the model
 * storage.yml: specifies where mode and dataset is located
+
+The `benchmark` folder contains YAML files for each model that configures information needed for the model to be validated through the [guidellm](https://github.com/neuralmagic/guidellm). There are  config files for each model:
+
+* server.yml: contains settings to start a vllm server with the model
+* client.yml: contains settings for the guidellm for the model
+* storage.yml: specifies where mode and dataset is located
diff --git a/accuracy/Meta-Llama-3.1-8B-Instruct/server.yml b/accuracy/Meta-Llama-3.1-8B-Instruct/server.yml
@@ -1,6 +1,6 @@
 # server configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
 model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
-trust_remote_code: true
-enable_chunked_prefill: true
-tensor_parallel_size: 
-max_model_len: 4096
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size:
+max-model-len: 4096
diff --git a/benchmark/Meta-Llama-3.1-8B-Instruct/client.yml b/benchmark/Meta-Llama-3.1-8B-Instruct/client.yml
@@ -0,0 +1,8 @@
+target: "http://localhost:8000/v1"
+model: "meta-llama/Llama-3.1-8B-Instruct"
+data:
+  prompt_tokens: 64
+  output_tokens: 16
+rate-type: throughput
+max-seconds: 400
+output_path: ""
diff --git a/benchmark/Meta-Llama-3.1-8B-Instruct/server.yml b/benchmark/Meta-Llama-3.1-8B-Instruct/server.yml
@@ -0,0 +1,8 @@
+# server configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+# config.yaml
+model: meta-llama/Llama-3.1-8B-Instruct
+uvicorn-log-level: "debug"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/benchmark/Meta-Llama-3.1-8B-Instruct/storage.yml b/benchmark/Meta-Llama-3.1-8B-Instruct/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+model: hf
+data: hf

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# storage configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct`
	`2`	`+model: hf`
	`3`	`+data: hf`