[NVFp4][MoE Calibration] Fix MoE Calibration Context (#1864)

dsikka · web-flow · commit 832bce768fe5 · 2025-09-25T23:37:33.000Z
SUMMARY:
- Fixes bug where despite being calibrated within the MoE Calibration
context, some experts were not being calibrated / activation scales
remained changed
- Update the Qwen3 MoE NVFP4 Example to only use 20 calibration samples,
not 200
diff --git a/examples/quantization_w4a4_fp4/qwen_30b_a3b.py b/examples/quantization_w4a4_fp4/qwen_30b_a3b.py
@@ -16,7 +16,7 @@
 DATASET_SPLIT = "train_sft"
 
 # Select number of samples
-NUM_CALIBRATION_SAMPLES = 200
+NUM_CALIBRATION_SAMPLES = 20
 MAX_SEQUENCE_LENGTH = 2048
 
 # Load dataset and preprocess.
@@ -84,7 +84,6 @@ def tokenize(sample):
 print(tokenizer.decode(output[0]))
 print("==========================================\n\n")
 
-
 # Save to disk in compressed-tensors format.
 SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-NVFP4"
 model.save_pretrained(SAVE_DIR, save_compressed=True)
diff --git a/src/llmcompressor/modeling/prepare.py b/src/llmcompressor/modeling/prepare.py
@@ -62,7 +62,7 @@ def update_qwen3_moe(model, stack, calibrate_all_experts):
 def moe_calibration_context(
     model: PreTrainedModel,
     stack,
-    calibrate_all_experts: bool = False,
+    calibrate_all_experts: bool = True,
 ):
     # Temporarily updates the MoE modules within the context
     # Once the context exists, parameter updates persist