make generic

dsikka · dsikka · commit dc36cfa7d64d · 2025-07-30T14:06:29.000Z
diff --git a/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py b/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
@@ -60,7 +60,7 @@ def compression_param_names(self) -> Tuple[str]:
             "weight_zero_point",
             "weight_global_scale",
         )
-    
+
     def compression_param_info(
         self,
         weight_shape: torch.Size,
@@ -75,11 +75,13 @@ def compression_param_info(
         :return: dictionary mapping compressed parameter names to shape and dtype
         """
         output = {
-            "weight_packed": (torch.Size((weight_shape[0], weight_shape[1] // 2)), torch.uint8),
+            "weight_packed": (
+                torch.Size((weight_shape[0], weight_shape[1] // 2)),
+                torch.uint8,
+            ),
         }
         return output
 
-
     def compress_weight(
         self,
         weight: Tensor,
diff --git a/src/compressed_tensors/quantization/lifecycle/forward.py b/src/compressed_tensors/quantization/lifecycle/forward.py
@@ -112,17 +112,21 @@ def dequantize(
             if scale.shape[1] == 1:
                 args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
             # Scale height matches input or is 1 -> group quantization across columns
-            # 
+            #
             # Example 1: scale.shape[0] == 1
             # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
             #
-            # Example 2: scale.shape[0] == x_q.shape[0] 
+            # Example 2: scale.shape[0] == x_q.shape[0]
             # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
             elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
                 group_size = int(x_q.shape[1] / scale.shape[1])
-                args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.GROUP, group_size=group_size
+                )
             else:
-                args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
+                args = QuantizationArgs(
+                    strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
+                )
         else:
             raise ValueError(
                 f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -253,11 +257,7 @@ def _process_quantization(
         QuantizationStrategy.GROUP,
         QuantizationStrategy.TENSOR_GROUP,
     ):
-        """
-        n_dims = x.shape
-        if len(n_dims) > 2:
-            x = x.squeeze(0)
-        """
+
         output_dtype = dtype if dtype is not None else x.dtype
         output = torch.zeros_like(x).to(output_dtype)
         columns = output.shape[-1]
@@ -290,25 +290,12 @@ def _process_quantization(
             perm = torch.argsort(g_idx)
             x = safe_permute(x, perm, dim=1)
 
-        if len(x.shape) > 2:
-            x = torch.reshape(
-                x,
-                (
-                    x.shape[0],
-                    x.shape[1],
-                    ceil(x.shape[-1] / group_size),
-                    group_size,
-                ),
-            )
-        else:
-              x = torch.reshape(
-                x,
-                (
-                    x.shape[0],
-                    ceil(x.shape[-1] / group_size),
-                    group_size,
-                ),
-            )
+        # Maintain all dimensions apart from the last dim, which is divided by the group_size
+        reshaped_dims = tuple(x.shape[:-1]) + (
+            ceil(x.shape[-1] / group_size),
+            group_size,
+        )
+        x = torch.reshape(x, reshaped_dims)
 
         if do_quantize:
             output = _quantize(
@@ -331,25 +318,16 @@ def _process_quantization(
                 global_scale=global_scale,
             )
 
-        if len(x.shape) > 3:
-            output = torch.reshape(
-                output,
-                (output.shape[0], output.shape[1], output.shape[-1] * output.shape[-2]),
-            )
-        else:
-            output = torch.reshape(
-                output,
-                (output.shape[0], output.shape[-1] * output.shape[-2]),
-            )
+        original_shaped_dims = tuple(output.shape[:-2]) + (
+            output.shape[-1] * output.shape[-2],
+        )
+        output = torch.reshape(output, original_shaped_dims)
 
         output = output.to(output_dtype)
 
         if not is_column_order:
             output = safe_permute(output, torch.argsort(perm), dim=1)
 
-        #if len(n_dims) > 2:
-        #    output = output.unsqueeze(0)
-
     else:  # covers channel, token and tensor strategies
         if do_quantize:
             output = _quantize(
diff --git a/src/compressed_tensors/quantization/utils/helpers.py b/src/compressed_tensors/quantization/utils/helpers.py
@@ -175,35 +175,23 @@ def compute_dynamic_scales_and_zp(
         QuantizationStrategy.TENSOR_GROUP,
         QuantizationStrategy.GROUP,
     ):
-        #if len(value.shape) > 2:
-        #    value = value.squeeze(0)
+
         if len(value.shape) > 2:
             dim = {0, 1, 2}
         else:
             dim = {0, 1}
 
-        reduce_dims = tuple(idx for idx in range(len(value.shape) + 1) if idx not in dim)
+        reduce_dims = tuple(
+            idx for idx in range(len(value.shape) + 1) if idx not in dim
+        )
         keep_dims = False
 
-        if len(value.shape) > 2:
-            value = torch.reshape(
-                value,
-                (
-                    value.shape[0],
-                    value.shape[1],
-                    math.ceil(value.shape[-1] / args.group_size),
-                    args.group_size,
-                ),
-            )
-        else:
-            value = torch.reshape(
-                value,
-                (
-                    value.shape[0],
-                    math.ceil(value.shape[-1] / args.group_size),
-                    args.group_size,
-                ),
-            )
+        reshaped_dims = tuple(value.shape[:-1]) + (
+            math.ceil(value.shape[-1] / args.group_size),
+            args.group_size,
+        )
+        value = torch.reshape(value, reshaped_dims)
+
     else:
         supported_strategies = (
             QuantizationStrategy.TOKEN,