fix(validation): add validation for lora target linear with quantize experts (#3461)

NanoCode012 · web-flow · commit 6c8c73e5a463 · 2026-03-06T09:19:05.000-05:00
* fix: add validation for lora target linear with quantize experts

* chore: fix lint

* chore: comment

* fix: missing link on readme
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@
 ## 🎉 Latest Updates
 
 - 2026/03:
-  - New model support has been added in Axolotl for Qwen3.5, Qwen3.5 MoE, [GLM-4.7-Flash](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm47-flash), [GLM-4.6V](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm46v), and [GLM-4.5-Air](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm45).
+  - New model support has been added in Axolotl for [Qwen3.5, Qwen3.5 MoE](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen3.5), [GLM-4.7-Flash](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm47-flash), [GLM-4.6V](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm46v), and [GLM-4.5-Air](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/glm45).
   - [MoE expert quantization](https://docs.axolotl.ai/docs/expert_quantization.html) support (via `quantize_moe_experts: true`) greatly reduces VRAM when training MoE models (FSDP2 compat).
 - 2026/02:
   - [ScatterMoE LoRA](https://github.com/axolotl-ai-cloud/axolotl/pull/3410) support. LoRA fine-tuning directly on MoE expert weights using custom Triton kernels.
diff --git a/docs/expert_quantization.qmd b/docs/expert_quantization.qmd
@@ -45,6 +45,7 @@ lora_target_parameters:
 
 ## Limitations
 
+- `lora_target_linear` is not compatible with `quantize_moe_experts`. See [Expert LoRA targeting](#expert-lora-targeting) instead.
 - `cpu_ram_efficient_loading` hangs / takes long time with FSDP2 + QLoRA.
 - Total model parameter count may display incorrectly (trainable param count is correct).
 - FSDP LoRA (8-bit) may have a large initial VRAM spike at the first 1-2 steps, which then drops. QLoRA does not exhibit this.
diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py
@@ -1302,6 +1302,11 @@ def check_multigpu_lora_kernels(cls, data):
     @classmethod
     def check_quantize_moe_experts(cls, data):
         if data.get("quantize_moe_experts"):
+            if data.get("lora_target_linear"):
+                raise ValueError(
+                    "lora_target_linear is not compatible with quantize_moe_experts. "
+                    "Use lora_target_parameters to target expert weights instead."
+                )
             if data.get("adapter") not in ("lora", "qlora"):
                 raise ValueError("quantize_moe_experts requires adapter: lora or qlora")
             if not (data.get("load_in_4bit") or data.get("load_in_8bit")):
diff --git a/tests/utils/schemas/validation/test_moe_quant.py b/tests/utils/schemas/validation/test_moe_quant.py
@@ -79,6 +79,20 @@ def test_false_skips_validation(self, min_base_cfg, gpu_caps, env_caps):
         result = validate_config(cfg, capabilities=gpu_caps, env_capabilities=env_caps)
         assert result["quantize_moe_experts"] is False
 
+    def test_rejects_lora_target_linear(self, min_base_cfg, gpu_caps, env_caps):
+        """quantize_moe_experts with lora_target_linear should fail."""
+        cfg = (
+            DictDefault(
+                quantize_moe_experts=True,
+                adapter="qlora",
+                load_in_4bit=True,
+                lora_target_linear=True,
+            )
+            | min_base_cfg
+        )
+        with pytest.raises(ValueError, match="lora_target_linear is not compatible"):
+            validate_config(cfg, capabilities=gpu_caps, env_capabilities=env_caps)
+
     def test_default_is_false(self, min_base_cfg, gpu_caps, env_caps):
         """quantize_moe_experts should default to false."""
         cfg = DictDefault({}) | min_base_cfg