make generic

dsikka · dsikka · commit b29792fd7210 · 2025-07-30T14:08:21.000Z
diff --git a/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
@@ -60,7 +60,7 @@ def compression_param_names(self) -> Tuple[str]:
             "weight_zero_point",
             "weight_global_scale",
         )
-    
+
     def compression_param_info(
         self,
         weight_shape: torch.Size,
@@ -75,11 +75,13 @@ def compression_param_info(
         :return: dictionary mapping compressed parameter names to shape and dtype
         """
         output = {
-            "weight_packed": (torch.Size((weight_shape[0], weight_shape[1] // 2)), torch.uint8),
+            "weight_packed": (
+                torch.Size((weight_shape[0], weight_shape[1] // 2)),
+                torch.uint8,
+            ),
         }
         return output
 
-
     def compress_weight(
         self,
         weight: Tensor,
diff --git a/src/compressed_tensors/quantization/lifecycle/forward.py b/src/compressed_tensors/quantization/lifecycle/forward.py
@@ -257,11 +257,7 @@ def _process_quantization(
         QuantizationStrategy.GROUP,
         QuantizationStrategy.TENSOR_GROUP,
     ):
-        """
-        n_dims = x.shape
-        if len(n_dims) > 2:
-            x = x.squeeze(0)
-        """
+
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
         columns = output.shape[-1]
@@ -294,25 +290,12 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
 
-        if len(x.shape) > 2:
-            x = torch.reshape(
-                x,
-                (
-                    x.shape[0],
-                    x.shape[1],
-                    ceil(x.shape[-1] / group_size),
-                    group_size,
-                ),
-            )
-        else:
-              x = torch.reshape(
-                x,
-                (
-                    x.shape[0],
-                    ceil(x.shape[-1] / group_size),
-                    group_size,
-                ),
-            )
+        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        reshaped_dims = tuple(x.shape[:-1]) + (
+            ceil(x.shape[-1] / group_size),
+            group_size,
+        )
+        x = torch.reshape(x, reshaped_dims)
 
         if do_quantize:
             output = _quantize(
@@ -335,25 +318,16 @@ def _process_quantization(
                 global_scale=global_scale,
             )
 
-        if len(x.shape) > 3:
-            output = torch.reshape(
-                output,
-                (output.shape[0], output.shape[1], output.shape[-1] * output.shape[-2]),
-            )
-        else:
-            output = torch.reshape(
-                output,
-                (output.shape[0], output.shape[-1] * output.shape[-2]),
-            )
+        original_shaped_dims = tuple(output.shape[:-2]) + (
+            output.shape[-1] * output.shape[-2],
+        )
+        output = torch.reshape(output, original_shaped_dims)
 
         output = output.to(output_dtype)
 
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
 
-        #if len(n_dims) > 2:
-        #    output = output.unsqueeze(0)
-
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -175,35 +175,23 @@ def compute_dynamic_scales_and_zp(
         QuantizationStrategy.TENSOR_GROUP,
         QuantizationStrategy.GROUP,
     ):
-        #if len(value.shape) > 2:
-        #    value = value.squeeze(0)
+
         if len(value.shape) > 2:
             dim = {0, 1, 2}
         else:
             dim = {0, 1}
 
-        reduce_dims = tuple(idx for idx in range(len(value.shape) + 1) if idx not in dim)
+        reduce_dims = tuple(
+            idx for idx in range(len(value.shape) + 1) if idx not in dim
+        )
         keep_dims = False
 
-        if len(value.shape) > 2:
-            value = torch.reshape(
-                value,
-                (
-                    value.shape[0],
-                    value.shape[1],
-                    math.ceil(value.shape[-1] / args.group_size),
-                    args.group_size,
-                ),
-            )
-        else:
-            value = torch.reshape(
-                value,
-                (
-                    value.shape[0],
-                    math.ceil(value.shape[-1] / args.group_size),
-                    args.group_size,
-                ),
-            )
+        reshaped_dims = tuple(value.shape[:-1]) + (
+            math.ceil(value.shape[-1] / args.group_size),
+            args.group_size,
+        )
+        value = torch.reshape(value, reshaped_dims)
+
     else:
         supported_strategies = (
             QuantizationStrategy.TOKEN,