fix get_expert_weights

Varun Sundar Rabindranath · Varun Sundar Rabindranath · commit 7440cb440057 · 2025-11-21T06:54:34.000Z
Signed-off-by: Varun Sundar Rabindranath &lt;vsundarr@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1225,29 +1225,35 @@ def load_weights(
                     yield param_name
 
     def get_expert_weights(self) -> Iterable[torch.Tensor]:
-        def maybe_make_contiguous(name: str, p: torch.nn.Parameter) -> torch.nn.Parameter:
+        def _maybe_make_contiguous(
+            name: str, p: torch.nn.Parameter
+        ) -> torch.nn.Parameter:
             """
-            Expert weight-scales are transposed and are represented
-            in column-major. This function transposes the tensor back
-            so the tensor is contiguous().
+            In some cases, the last 2 dimensions (the non-expert dimensions)
+            of the weight scale tensor are transposed. This function transposes
+            the tensor back so the tensor is contiguous().
+            Example: A scale tensor,
+              `x` of shape (E, 32, 16) and stride (512, 1, 32) is transposed to
+              `xt` of shape (E, 16, 32) and stride (512, 32, 1).
+              Note that we specifically use torch.transpose() so `xt` refers
+              to the same underlying memory. The tensors `x` and `xt`, pointing
+              to the same underlying memory make this transformation safe in the
+              context of EPLB. i.e. It is the same memory and just the view
+              is different.
+            Note: This function handles the "weight_scale" tensors specifically.
+            This could however be generalized to handle similar tensors.
             """
-            if p.is_contiguous():
-                return p
-            if "weight_scale" not in name:
+            # Check if the last 2 dimensions are trasposed
+            is_transposed = p.stride(1) == 1 and p.stride(2) != 1
+            if p.is_contiguous() or not is_transposed or "weight_scale" not in name:
                 # do nothing.
                 return p
             assert p.ndim == 3
-            # Check if the tensor is tranposed
-            is_colmajor = p.size(1) == 1 and p.size(2) != 1
-            p = torch.transpose(p, 1, 2)
-            assert p.is_contiguous()
+            p.data = torch.transpose(p.data, 1, 2)
             return p
 
         weights = list(self.named_parameters())
-        weights = [ (name, maybe_make_contiguous(name, p))  for name, p in weights]
-
-        #for name, weight in weights:
-        #    print (f"{name} is_contiguous() ? {weight.is_contiguous()}")
+        weights = [(name, _maybe_make_contiguous(name, p)) for name, p in weights]
 
         assert all(
             weight.is_contiguous()