initiate repo

dhuangnm · dhuangnm · commit 993568a3a7f1 · 2025-03-31T22:56:43.000-04:00
diff --git a/README.md b/README.md
@@ -1 +1,6 @@
-# model-validation-configs
+# model-validation-configs
+
+This repository contains configurations for model validation.
+
+The `lm-eval` folder contains YAML files for each model that configures information needed for the model to be validated through the [lm-evaluation-harness]
+[https://github.com/huggingface/lm-evaluation-harness].
diff --git a/lm-eval/Meta-Llama-3.1-8B-Instruct.yaml b/lm-eval/Meta-Llama-3.1-8B-Instruct.yaml
@@ -0,0 +1,47 @@
+# configs for https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+server:
+  model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+  trust_remote_code: true
+  enable_chunked_prefill: true
+  tensor_parallel_size: 
+  max_model_len: 4096
+
+storage:
+  model: hf
+  data: hf
+
+lm-eval:
+  model: "vllm"
+  model_args:
+    pretrained: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+  num_fewshot:
+  apply_chat_template: true
+  fewshot_as_multiturn: true
+  add_bos_token: false
+  tasks:
+  - name: "leaderboard_bbh"
+    metrics:
+    - name: "acc_norm,none"
+      value: 0.50946
+  - name: "leaderboard_gpqa"
+    metrics:
+    - name: "acc_norm,none"
+      value: 0.29698
+  - name: "leaderboard_ifeval"
+    metrics:
+    - name: "inst_level_loose_acc,none"
+      value: 0.85851
+    - name: "inst_level_strict_acc,none"
+      value: 0.82374
+    - name: "prompt_level_loose_acc,none"
+      value: 0.79667
+    - name: "prompt_level_strict_acc,none"
+      value: 0.74861
+  - name: "leaderboard_math_hard"
+    metrics:
+    - name: "exact_match,none"
+      value: 0.19864
+  - name: "leaderboard_musr"
+    metrics:
+    - name: "acc_norm,none"
+      value: 0.38359