[Minor] Pruning doc update + bring minitron import to mtp.* instead of mtp.plugins.*

kevalmorabia97 · kevalmorabia97 · commit b6f831f138cd · 2025-10-10T10:46:24.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/examples/megatron-lm/README.md b/examples/megatron-lm/README.md
@@ -110,6 +110,8 @@ Coming soon ...
 
 ### ⭐ Pruning
 
+Checkout pruning [getting started section](../pruning/README.md#getting-started) and [guidelines](../pruning/README.md#pruning-guidelines) for configuring pruning parameters in the pruning README.
+
 Pruning is supported for GPT and Mamba models in Pipeline Parallel mode. Available pruning options are:
 
 - `TARGET_FFN_HIDDEN_SIZE`
@@ -121,14 +123,20 @@ Pruning is supported for GPT and Mamba models in Pipeline Parallel mode. Availab
 - `TARGET_NUM_LAYERS`
 - `LAYERS_TO_DROP` (comma separated, 1-indexed list of layer numbers to directly drop)
 
+Example for depth pruning Qwen3-8B from 36 to 24 layers:
+
 ```sh
 PP=1 \
 TARGET_NUM_LAYERS=24 \
 HF_MODEL_CKPT=<pretrained_model_name_or_path> \
-MLM_MODEL_SAVE=/tmp/Qwen3-8B-DPruned \
+MLM_MODEL_SAVE=Qwen3-8B-Pruned \
 bash megatron-lm/examples/post_training/modelopt/prune.sh qwen/Qwen3-8B
 ```
 
+> [!TIP]
+> If number of layers in the model is not divisible by pipeline parallel size (PP), you can configure uneven
+> PP by setting `MLM_EXTRA_ARGS="--decoder-first-pipeline-num-layers <X> --decoder-last-pipeline-num-layers <Y>"`
+
 ## Learn More About Configuration
 
 For simplicity, we use `shell` scripts and variables as arguments. Each script has at least 1 positional
diff --git a/modelopt/torch/prune/__init__.py b/modelopt/torch/prune/__init__.py
@@ -21,6 +21,10 @@
 
 # nas is a required - so let's check if it's available
 import modelopt.torch.nas
+from modelopt.torch.utils import import_plugin
 
 from . import fastnas, gradnas, plugins
 from .pruning import *
+
+with import_plugin("mcore_minitron", verbose=False):
+    from .plugins import mcore_minitron
diff --git a/modelopt/torch/prune/plugins/mcore_minitron.py b/modelopt/torch/prune/plugins/mcore_minitron.py
@@ -37,6 +37,7 @@
     HAS_MAMBA,
     _DynamicMCoreLanguageModel,
     SUPPORTED_MODELS,
+    drop_mcore_language_model_layers,
 )
 # isort: on
 
@@ -70,7 +71,13 @@
     "num_layers",
 }
 
-__all__ = ["MCoreMinitronConfig", "MCoreMinitronModeDescriptor", "MCoreMinitronSearcher"]
+__all__ = [
+    "SUPPORTED_HPARAMS",
+    "MCoreMinitronConfig",
+    "MCoreMinitronModeDescriptor",
+    "MCoreMinitronSearcher",
+    "drop_mcore_language_model_layers",
+]
 
 
 class MCoreMinitronSearcher(BaseSearcher):