docs: Add header comments and rename to qlora.yaml for qwen1_5 config

yuhezhang-ai · yuhezhang-ai · commit f9b9e2312da3 · 2026-01-04T22:46:58.000-05:00
Signed-off-by: Yuhe Zhang &lt;yuhe@polarr.co&gt;
diff --git a/examples/llm_finetune/qwen/qwen1_5_moe_a2_7b_qlora.yaml b/examples/llm_finetune/qwen/qwen1_5_moe_a2_7b_qlora.yaml
@@ -1,3 +1,15 @@
+# ------------------------------------------------------------------------------------------------
+# Single-GPU QLoRA Example for Qwen1.5-MoE-A2.7B
+# ------------------------------------------------------------------------------------------------
+# This config demonstrates MoE LoRA training on a single GPU using 4-bit quantization (QLoRA).
+#
+# Key differences from multi-GPU DeepEP configs:
+# - Uses 4-bit quantization to fit in single GPU memory
+# - No Expert Parallelism (ep_size defaults to 1)
+# - No DeepEP backend (requires multi-GPU)
+# - Smaller batch sizes appropriate for single GPU
+# ------------------------------------------------------------------------------------------------
+
 quantization:
   load_in_4bit: true
   bnb_4bit_compute_dtype: bfloat16