Add more model configs (#13)

dhuangnm · web-flow · commit c956c95ed05b · 2025-04-17T13:12:59.000-04:00
* add model configs for summit models

* add more models for the summit

* add more models

* add more models

* clean up

* set values for num_fewshot and tensor-parallel-size

* update client.yml

* pull Derek's PRs

* fix model path

* add and update model configs

* fix typo and set tensor-parallel-size to 1 for base config

* fix a typo and add 3 more models added this morning
diff --git a/Qwen/Qwen2.5-7B-Instruct/accuracy/client.yml b/Qwen/Qwen2.5-7B-Instruct/accuracy/client.yml
@@ -0,0 +1,3 @@
+# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
+model: "Qwen/Qwen2.5-7B-Instruct"
+chat_template: true
diff --git a/Qwen/Qwen2.5-7B-Instruct/accuracy/server.yml b/Qwen/Qwen2.5-7B-Instruct/accuracy/server.yml
@@ -0,0 +1,6 @@
+# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
+model: "Qwen/Qwen2.5-7B-Instruct"
+trust-remote-code: true
+add-bos-token: false
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/Qwen/Qwen2.5-7B-Instruct/storage.yml b/Qwen/Qwen2.5-7B-Instruct/storage.yml
@@ -0,0 +1,3 @@
+# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
+model: hf
+data: hf
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/accuracy/client.yml b/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
+model: "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
+model: "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/storage.yml b/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/accuracy/client.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,5 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16
+model: "RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16"
+chat_template: true
+fewshot_as_multiturn: true
+num_fewshot: 5
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16
+model: "RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/storage.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/accuracy/client.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8
+model: "RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/accuracy/server.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8
+model: "RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/storage.yml b/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/client.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,3 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic
+model: "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"
+chat_template: true
diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic
+model: "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/storage.yml b/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/client.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/storage.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/accuracy/client.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/storage.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/accuracy/client.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/accuracy/server.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
+model: "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/storage.yml b/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/client.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/server.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/storage.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-FP8-Dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/accuracy/client.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/accuracy/server.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/storage.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/client.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,3 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8"
+chat_template: true
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/server.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8
+model: "RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/storage.yml b/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-24B-Instruct-2501-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/client.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,5 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"
+chat_template: true
+fewshot_as_multiturn: true
+num_fewshot: 5
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/server.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/storage.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/accuracy/client.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,5 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16"
+chat_template: true
+fewshot_as_multiturn: true
+num_fewshot: 5
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/accuracy/server.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/storage.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/client.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,5 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8"
+chat_template: true
+fewshot_as_multiturn: true
+num_fewshot: 5
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/server.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8
+model: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/storage.yml b/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/client.yml b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic
+model: "RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,5 @@
+# server configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic
+model: "RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic"
+trust-remote-code: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/storage.yml b/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/client.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,4 @@
+# https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16
+model: "RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,5 @@
+# https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16
+model: "RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16"
+trust-remote-code: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/storage.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/client.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8
+model: "RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/server.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,5 @@
+# server configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8
+model: "RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8"
+trust-remote-code: true
+tensor-parallel-size: 1
+max-model-len: 8192
diff --git a/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/storage.yml b/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-Instruct-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/client.yml b/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,3 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-quantized.w4a16
+model: "RedHatAI/Qwen2.5-7B-quantized.w4a16"
+chat_template: true
diff --git a/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/server.yml b/RedHatAI/Qwen2.5-7B-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-quantized.w4a16
+model: "RedHatAI/Qwen2.5-7B-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/Qwen2.5-7B-quantized.w4a16/storage.yml b/RedHatAI/Qwen2.5-7B-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/Qwen2.5-7B-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/accuracy/client.yml b/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic
+model: "RedHatAI/granite-3.1-8b-instruct-FP8-dynamic"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/accuracy/server.yml b/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic
+model: "RedHatAI/granite-3.1-8b-instruct-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/storage.yml b/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/accuracy/client.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16
+model: "RedHatAI/granite-3.1-8b-instruct-quantized.w4a16"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/accuracy/server.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16
+model: "RedHatAI/granite-3.1-8b-instruct-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/storage.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/accuracy/client.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8
+model: "RedHatAI/granite-3.1-8b-instruct-quantized.w8a8"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/accuracy/server.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8
+model: "RedHatAI/granite-3.1-8b-instruct-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/storage.yml b/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/granite-3.1-8b-instruct-quantized.w8a8
+model: hf
+data: hf
diff --git a/RedHatAI/phi-4-FP8-dynamic/accuracy/client.yml b/RedHatAI/phi-4-FP8-dynamic/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/phi-4-FP8-dynamic
+model: "RedHatAI/phi-4-FP8-dynamic"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/phi-4-FP8-dynamic/accuracy/server.yml b/RedHatAI/phi-4-FP8-dynamic/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/phi-4-FP8-dynamic
+model: "RedHatAI/phi-4-FP8-dynamic"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/phi-4-FP8-dynamic/storage.yml b/RedHatAI/phi-4-FP8-dynamic/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/phi-4-FP8-dynamic
+model: hf
+data: hf
diff --git a/RedHatAI/phi-4-quantized.w4a16/accuracy/client.yml b/RedHatAI/phi-4-quantized.w4a16/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/phi-4-quantized.w4a16
+model: "RedHatAI/phi-4-quantized.w4a16"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/phi-4-quantized.w4a16/accuracy/server.yml b/RedHatAI/phi-4-quantized.w4a16/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/phi-4-quantized.w4a16
+model: "RedHatAI/phi-4-quantized.w4a16"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/phi-4-quantized.w4a16/storage.yml b/RedHatAI/phi-4-quantized.w4a16/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/phi-4-quantized.w4a16
+model: hf
+data: hf
diff --git a/RedHatAI/phi-4-quantized.w8a8/accuracy/client.yml b/RedHatAI/phi-4-quantized.w8a8/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/RedHatAI/phi-4-quantized.w8a8
+model: "RedHatAI/phi-4-quantized.w8a8"
+chat_template: true
+batch_size: "auto"
diff --git a/RedHatAI/phi-4-quantized.w8a8/accuracy/server.yml b/RedHatAI/phi-4-quantized.w8a8/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/RedHatAI/phi-4-quantized.w8a8
+model: "RedHatAI/phi-4-quantized.w8a8"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/RedHatAI/phi-4-quantized.w8a8/storage.yml b/RedHatAI/phi-4-quantized.w8a8/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/RedHatAI/phi-4-quantized.w8a8
+model: hf
+data: hf
diff --git a/ibm-granite/granite-3.1-8b-instruct/accuracy/client.yml b/ibm-granite/granite-3.1-8b-instruct/accuracy/client.yml
@@ -0,0 +1,4 @@
+# llm-eval-test configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct
+model: "ibm-granite/granite-3.1-8b-instruct"
+chat_template: true
+fewshot_as_multiturn: true
diff --git a/ibm-granite/granite-3.1-8b-instruct/accuracy/server.yml b/ibm-granite/granite-3.1-8b-instruct/accuracy/server.yml
@@ -0,0 +1,6 @@
+# server configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct
+model: "ibm-granite/granite-3.1-8b-instruct"
+trust-remote-code: true
+enable-chunked-prefill: true
+tensor-parallel-size: 1
+max-model-len: 4096
diff --git a/ibm-granite/granite-3.1-8b-instruct/storage.yml b/ibm-granite/granite-3.1-8b-instruct/storage.yml
@@ -0,0 +1,3 @@
+# storage configs for https://huggingface.co/ibm-granite/granite-3.1-8b-instruct
+model: hf
+data: hf
diff --git a/meta-llama/Llama-3.1-8B-Instruct/accuracy/client.yml b/meta-llama/Llama-3.1-8B-Instruct/accuracy/client.yml
diff --git a/meta-llama/Llama-3.1-8B-Instruct/accuracy/server.yml b/meta-llama/Llama-3.1-8B-Instruct/accuracy/server.yml
diff --git a/meta-llama/Llama-3.1-8B-Instruct/storage.yml b/meta-llama/Llama-3.1-8B-Instruct/storage.yml
diff --git a/meta-llama/Llama-3.3-70B-Instruct/accuracy/client.yml b/meta-llama/Llama-3.3-70B-Instruct/accuracy/client.yml
diff --git a/meta-llama/Llama-3.3-70B-Instruct/accuracy/server.yml b/meta-llama/Llama-3.3-70B-Instruct/accuracy/server.yml
diff --git a/meta-llama/Llama-3.3-70B-Instruct/storage.yml b/meta-llama/Llama-3.3-70B-Instruct/storage.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/client.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/client.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/server.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/server.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/tasks.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/accuracy/tasks.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/storage.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8/storage.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/client.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/client.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/server.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/server.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/tasks.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct/accuracy/tasks.yml
diff --git a/meta-llama/Llama-4-Maverick-17B-128E-Instruct/storage.yml b/meta-llama/Llama-4-Maverick-17B-128E-Instruct/storage.yml
diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/client.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/client.yml
diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/server.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/server.yml
diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/accuracy/tasks.yml
diff --git a/meta-llama/Llama-4-Scout-17B-16E-Instruct/storage.yml b/meta-llama/Llama-4-Scout-17B-16E-Instruct/storage.yml
diff --git a/microsoft/phi-4/accuracy/client.yml b/microsoft/phi-4/accuracy/client.yml
diff --git a/microsoft/phi-4/accuracy/server.yml b/microsoft/phi-4/accuracy/server.yml
diff --git a/microsoft/phi-4/storage.yml b/microsoft/phi-4/storage.yml
diff --git a/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/client.yml b/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/client.yml
diff --git a/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/server.yml b/mistralai/Mistral-Small-24B-Instruct-2501/accuracy/server.yml
diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/client.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/client.yml
diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/server.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/server.yml
diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/tasks.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/accuracy/tasks.yml
diff --git a/mistralai/Mistral-Small-3.1-24B-Instruct-2503/storage.yml b/mistralai/Mistral-Small-3.1-24B-Instruct-2503/storage.yml
diff --git a/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/client.yml b/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/client.yml
diff --git a/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/server.yml b/mistralai/Mixtral-8x7B-Instruct-v0.1/accuracy/server.yml
diff --git a/mistralai/Mixtral-8x7B-Instruct-v0.1/storage.yml b/mistralai/Mixtral-8x7B-Instruct-v0.1/storage.yml

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct`
	`2`	`+model: "Qwen/Qwen2.5-7B-Instruct"`
	`3`	`+chat_template: true`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# https://huggingface.co/Qwen/Qwen2.5-7B-Instruct`
	`2`	`+model: hf`
	`3`	`+data: hf`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# storage configs for https://huggingface.co/RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic`
	`2`	`+model: hf`
	`3`	`+data: hf`