respond coderabbit

jenchen13 · jenchen13 · commit 5d46a7bad8d9 · 2025-09-05T18:12:00.000Z
Signed-off-by: Jennifer Chen &lt;jennifchen@nvidia.com&gt;
diff --git a/examples/nemo_run/common/in_memory_mmlu.py b/examples/nemo_run/common/in_memory_mmlu.py
@@ -25,9 +25,10 @@ def parse_args():
     parser = argparse.ArgumentParser(
         description="Run MMLU evaluation with ModelOpt Megatron model. Provide either --nemo_ckpt or --ckpt_dir"
     )
-    parser.add_argument("--nemo_ckpt", type=str, required=False, help="Path to NeMo checkpoint.")
-    parser.add_argument(
-        "--ckpt_dir",
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--nemo_ckpt", type=str, required=False, help="Path to NeMo checkpoint.")
+    group.add_argument(
+        "--finetuned_ckpt_dir",
         required=False,
         type=str,
         help="Checkpoint directory of 1 or more finetuned models",
@@ -43,7 +44,6 @@ def parse_args():
 
 if __name__ == "__main__":
     args = parse_args()
-    assert args.nemo_ckpt or args.ckpt_dir, "Provide one of either --nemo_ckpt or --ckpt_dir."
     ckpt_path = args.nemo_ckpt
     if args.ckpt_dir:
         ckpt_path = _get_most_recent_ckpt(args.ckpt_dir)
diff --git a/examples/nemo_run/qat/ADVANCED.md b/examples/nemo_run/qat/ADVANCED.md
@@ -1,6 +1,6 @@
 # NeMo QAT/QAD Flow: Advanced Topics
 
-If you need to run QAT/QAD on a Slurm cluster (for example to use more than 1 node)
+If you need to run QAT/QAD on a Slurm cluster (for example to use more than 1 node), this guide covers how to configure and launch on Slurm.
 
 To run the example on slurm, edit the `SLURM_CONFIG` at the bottom of `nemo_qat_flow.py` with the appropriate credentials, container, cluster name (host), and container mounts. Make sure you are mounting the NeMo and Megatron-LM repositories above in the Slurm cluster and that you've checked out the correct commits.
 
diff --git a/examples/nemo_run/qat/README.md b/examples/nemo_run/qat/README.md
@@ -12,18 +12,20 @@
 
 This directory contains an end-to-end QAT Simplified Flow example using NeMo for model training. It supports both QAT with cross-entropy loss and QAD (quantization-aware distillation) with knowledge-distillation loss between the BF16 teacher and quantized student models.
 
-After PTQ (post-training quantization), the quantized model may
+After PTQ (post-training quantization), the quantized model may show some accuracy degradation on tasks like MMLU; the QAT/QAD stages aim to recover that loss.
 
 ## Flow Stages
 
-Currently the Simplified Flow runs the following steps in order:
+The Simplified Flow runs the following steps in order:
 
-1. Process Nvidia/OpenScience data (if `--data-path` is not specified)
-1. Import NeMo BF16 model checkpoint and evaluate 5% of MMLU on BF16 checkpoint
-1. PTQ the model and evaluate 5% of MMLU on PTQ Checkpoint
-1. SFT (finetune) the model
-1. Evaluate 5% of MMLU on the SFT checkpoint
-1. Export model to Unified checkpoint (HuggingFace) format in lower precision
+1. 00_openscience_data — Process NVIDIA/OpenScience data (skipped if `--data-path` is given)
+1. 01_import_model — Import NeMo BF16 model checkpoint
+1. 02_mmlu_bf16 — Evaluate 5% MMLU on BF16 checkpoint
+1. 03_ptq — Apply PTQ
+1. 04_mmlu_ptq — Evaluate 5% MMLU on PTQ checkpoint
+1. 05_train — SFT/QAT (and optional QAD)
+1. 06_mmlu_sft — Evaluate 5% MMLU on SFT/QAT checkpoint
+1. 07_export_hf — Export to Hugging Face (Unified) format
 
 ```mermaid
 graph TD;
diff --git a/examples/nemo_run/qat/nemo_qat_flow.py b/examples/nemo_run/qat/nemo_qat_flow.py
@@ -138,14 +138,8 @@ def get_args():
         "--enable_kv_cache",
         help="Enables KV-cache quantization",
         action="store_true",
+        default=False
     )
-    parser.add_argument(
-        "--disable_kv_cache",
-        dest="enable_kv_cache",
-        action="store_false",
-    )
-
-    parser.set_defaults(enable_kv_cache=None)
     return parser.parse_args()
 
 
@@ -265,7 +259,7 @@ def main(args):
     )
     eval_sft = run.Script(
         mmlu_script_path,
-        args=["--ckpt_dir", exp_dir],
+        args=["--finetuned_ckpt_dir", exp_dir],
         entrypoint="python",
     )
 
diff --git a/modelopt/torch/export/plugins/nemo_run.py b/modelopt/torch/export/plugins/nemo_run.py
@@ -57,15 +57,17 @@ def _get_most_recent_ckpt(directory: str):
         str: Path to the most recent subdirectory.
     """
     exp_dir = Path(directory) / "default"
-    assert exp_dir.exists(), f"Experiment directory {exp_dir} does not exist"
+    if not exp_dir.exists(): 
+        raise FileNotFoundError(f"Experiment directory {exp_dir} does not exist")
 
     checkpoint_dir = exp_dir / "checkpoints"
     if checkpoint_dir.exists():
         most_recent = _get_most_recent_subdir(checkpoint_dir)
     else:
         most_recent = _get_most_recent_subdir(exp_dir)
         checkpoint_dir = most_recent / "checkpoints"
-        assert checkpoint_dir.exists(), f"Checkpoint directory {checkpoint_dir} does not exist"
+        if not checkpoint_dir.exists():
+            raise FileNotFoundError(f"Checkpoint directory {checkpoint_dir} does not exist")
         most_recent = _get_most_recent_subdir(checkpoint_dir)
 
     return str(most_recent)