Update the comment

jingyu-ml · jingyu-ml · commit 6df19549d519 · 2025-09-30T05:09:46.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/peft/lora/plugins/megatron.py b/modelopt/torch/peft/lora/plugins/megatron.py
@@ -157,7 +157,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
         """Sharding along axis 0 for ColumnParallelLinear, bias not sharded.
 
         For ColumnParallelLinear:
-        - lora_a weight: sharded at dim 0
+        (lora_a is a regular nn.Linear and is not sharded)
         - lora_b weight: sharded at dim 0
         """
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
@@ -233,7 +233,7 @@ def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
 
         For RowParallelLinear:
         - lora_a weight: sharded at dim 1 (RowParallelLinear)
-        - lora_b weight: sharded at dim 0 (ColumnParallelLinear)
+        (lora_b is a regular nn.Linear and is not sharded)
         """
         sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)