nit: lint and fmt

kmehant · kmehant · commit dbaf61a42d22 · 2025-11-28T11:53:38.000+05:30
Signed-off-by: Mehant Kammakomati &lt;mehant.kammakomati2@ibm.com&gt;
diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml
@@ -17,6 +17,7 @@ jobs:
           - "attention-and-distributed-packing"
           - "accelerated-moe"
           - "online-data-mixing"
+          - "mamba-cp"
 
     permissions:
       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -32,6 +32,7 @@ jobs:
           - "attention-and-distributed-packing"
           - "accelerated-moe"
           - "online-data-mixing"
+          - "mamba-cp"
 
     steps:
       - name: Delete huge unnecessary tools folder
diff --git a/plugins/mamba-cp/src/fms_acceleration_mcp/utils/utils.py b/plugins/mamba-cp/src/fms_acceleration_mcp/utils/utils.py
@@ -14,25 +14,28 @@
 # Standard
 from typing import Dict
 
-# Third Party
 try:
+    # Third Party
     from mamba_ssm.modules.mamba2_cp import Mamba2CP
-except ImportError:
-    raise ValueError("custom mamba_ssm package installation is needed"
-                     "install from https://github.com/garrett361/mamba/tree/mamba-cp"
-                     )
+except ImportError as exc:
+    raise ValueError(
+        "custom mamba_ssm package installation is needed"
+        "install from https://github.com/garrett361/mamba/tree/mamba-cp"
+    ) from exc
+# Third Party
 from accelerate.logging import get_logger
 
 # pylint: disable=import-error
 from torch.distributed._tensor.device_mesh import init_device_mesh
-from tqdm import tqdm
-from transformers.modeling_utils import is_fsdp_enabled
-import torch
 
 # to avoid rechunking/sharding of the buffers
 # ideally this is not optimal
 # this is done to make self attention cp compatible with mamba cp
 from torch.distributed.tensor.experimental._attention import _cp_options
+from tqdm import tqdm
+from transformers.modeling_utils import is_fsdp_enabled
+import torch
+
 _cp_options.enable_load_balance = False
 
 logger = get_logger(__name__)
@@ -42,7 +45,8 @@
 key_cp = "cp"
 key_rep = "dp_shard"
 
-# extract ssm config from hf config to be used 
+
+# extract ssm config from hf config to be used
 # while swapping the mamba modules
 def get_ssmconfig_from_hfconfig(hf_config) -> Dict:
     config_ssm = {}
@@ -75,6 +79,7 @@ def forward(
             inference_params=None,
         )
 
+
 # patches each mamba module with mamba cp module
 # mamba cp module's weights are exactly same as hf mamba module
 # so we reuse the state dict and the same does not need special handling
@@ -94,7 +99,7 @@ def patch_mamba_layers_with_cp_head(
     if is_fsdp_enabled():
         device = torch.device("cpu")
     rep_size = world_size // cp_degree
-    
+
     # auto infer ddp and cp ranks
     # does not work on other combination of parallelisms
     logger.warning(