yiliu30
diff --git a/‎…M/recipes/actorder/recipe_awq_nvfp4.yaml‎ ‎…vLLM/recipes/NVFP4/recipe_awq_nvfp4.yaml‎tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4.yaml renamed to tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4.yaml b/‎…M/recipes/actorder/recipe_awq_nvfp4.yaml‎ ‎…vLLM/recipes/NVFP4/recipe_awq_nvfp4.yaml‎tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4.yaml renamed to tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4.yaml
diff --git a/‎…ecipes/actorder/recipe_awq_nvfp4a16.yaml‎ ‎…M/recipes/NVFP4/recipe_awq_nvfp4a16.yaml‎tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4a16.yaml renamed to tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4a16.yaml b/‎…ecipes/actorder/recipe_awq_nvfp4a16.yaml‎ ‎…M/recipes/NVFP4/recipe_awq_nvfp4a16.yaml‎tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4a16.yaml renamed to tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4a16.yaml
diff --git a/‎tests/lmeval/configs/awq_nvfp4.yaml‎ ‎…ts/lmeval/skipped_configs/awq_nvfp4.yaml‎tests/lmeval/configs/awq_nvfp4.yaml renamed to tests/lmeval/skipped_configs/awq_nvfp4.yaml
Lines changed: 2 additions & 1 deletion b/‎tests/lmeval/configs/awq_nvfp4.yaml‎ ‎…ts/lmeval/skipped_configs/awq_nvfp4.yaml‎tests/lmeval/configs/awq_nvfp4.yaml renamed to tests/lmeval/skipped_configs/awq_nvfp4.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/lmeval/configs/awq_nvfp4a16.yaml‎ ‎…lmeval/skipped_configs/awq_nvfp4a16.yaml‎tests/lmeval/configs/awq_nvfp4a16.yaml renamed to tests/lmeval/skipped_configs/awq_nvfp4a16.yaml
Lines changed: 2 additions & 1 deletion b/‎tests/lmeval/configs/awq_nvfp4a16.yaml‎ ‎…lmeval/skipped_configs/awq_nvfp4a16.yaml‎tests/lmeval/configs/awq_nvfp4a16.yaml renamed to tests/lmeval/skipped_configs/awq_nvfp4a16.yaml
Lines changed: 2 additions & 1 deletion
@@ -4,10 +4,11 @@ scheme: NVFP4
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 num_calibration_samples: 20
-recipe: tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4.yaml
+recipe: tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4.yaml
 lmeval:
   # NVFP4 (4-bit weights + 4-bit activations) has lower recovery than FP8/INT8
   # Observed: strict-match ~92.81%, flexible-extract ~89.59%
+  # TODO: check if recovery is consistent - 0.65 is too low for 0.94 recovery
   recovery_threshold:
     exact_match,strict-match: 0.92
     exact_match,flexible-extract: 0.88
 
@@ -4,10 +4,11 @@ scheme: NVFP4
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
 num_calibration_samples: 20
-recipe: tests/e2e/vLLM/recipes/actorder/recipe_awq_nvfp4a16.yaml
+recipe: tests/e2e/vLLM/recipes/NVFP4/recipe_awq_nvfp4a16.yaml
 lmeval:
   # NVFP4 (4-bit weights + 4-bit activations) has lower recovery than FP8/INT8
   # Observed: strict-match ~92.81%, flexible-extract ~89.59%
+  # TODO: check if recovery is consistent - 0.65 is too low for 0.94 recovery
   recovery_threshold:
     exact_match,strict-match: 0.95
     exact_match,flexible-extract: 0.94