[mxfp/easy] handle an empty tensor in downcast_to_mxfp (#7579)

jongsoo-openai · web-flow · commit 7708b9c12a98 · 2025-07-21T16:08:47.000+01:00
Fix ``` > kernel_src_tensor = src_tensor.reshape(-1, src_tensor.shape[-1]) E RuntimeError: cannot reshape tensor of 0 elements into shape [-1, 0] because the unspecified dimension size -1 can be any value and is ambiguous ```  # New contributor declaration - [x] I am not making a trivial change, such as fixing a typo in a comment. - [x] I have written a PR description following these [rules](https://cbea.ms/git-commit/#why-not-how). - [x] I have run `pre-commit run --from-ref origin/main --to-ref HEAD`. - Select one of the following. - [x] I have added tests. - `/test` for `lit` tests - `/unittest` for C++ tests - `/python/test` for end-to-end tests - [ ] This PR does not need a test because `FILL THIS IN`. - Select one of the following. - [x] I have not added any `lit` tests. - [ ] The `lit` tests I have added follow these [best practices](https://mlir.llvm.org/getting_started/TestingGuide/#filecheck-best-practices), including the "tests should be minimal" section. (Usually running Python code and using the instructions it generates is not minimal.)
diff --git a/python/triton_kernels/triton_kernels/numerics_details/mxfp.py b/python/triton_kernels/triton_kernels/numerics_details/mxfp.py
@@ -46,19 +46,20 @@ def downcast_to_mxfp(src_tensor: torch.Tensor, out_quant_type: torch.dtype, axis
     out_quant_tensor = src_tensor.new_empty(out_shape, dtype=out_quant_type)
     out_scale = src_tensor.new_empty(out_scale_shape, dtype=torch.uint8)
 
-    kernel_src_tensor = src_tensor.reshape(-1, src_tensor.shape[-1])
-    kernel_quant_tensor = out_quant_tensor.view(-1, out_quant_tensor.shape[-1])
-    kernel_scale = out_scale.view(-1, out_scale.shape[-1])
-
-    BLOCK_OUT_DIM = 128
-    BLOCK_QUANT_DIM = MXFP_BLOCK_SIZE
-    grid_out = triton.cdiv(kernel_src_tensor.shape[0], BLOCK_OUT_DIM)
-    grid_quant = triton.cdiv(kernel_src_tensor.shape[1], BLOCK_QUANT_DIM)
-
-    _downcast_to_mxfp[(grid_out, grid_quant)](kernel_quant_tensor, *kernel_quant_tensor.stride(), kernel_scale,
-                                              *kernel_scale.stride(), kernel_src_tensor, *kernel_src_tensor.stride(),
-                                              *kernel_src_tensor.shape, BLOCK_OUT_DIM, BLOCK_QUANT_DIM,
-                                              DEQUANT_SCALE_ROUNDING_MODE.value, num_warps=8)
+    if src_tensor.numel() > 0:
+        kernel_src_tensor = src_tensor.reshape(-1, src_tensor.shape[-1])
+        kernel_quant_tensor = out_quant_tensor.view(-1, out_quant_tensor.shape[-1])
+        kernel_scale = out_scale.view(-1, out_scale.shape[-1])
+
+        BLOCK_OUT_DIM = 128
+        BLOCK_QUANT_DIM = MXFP_BLOCK_SIZE
+        grid_out = triton.cdiv(kernel_src_tensor.shape[0], BLOCK_OUT_DIM)
+        grid_quant = triton.cdiv(kernel_src_tensor.shape[1], BLOCK_QUANT_DIM)
+
+        _downcast_to_mxfp[(grid_out, grid_quant)](kernel_quant_tensor, *kernel_quant_tensor.stride(), kernel_scale,
+                                                *kernel_scale.stride(), kernel_src_tensor, *kernel_src_tensor.stride(),
+                                                *kernel_src_tensor.shape, BLOCK_OUT_DIM, BLOCK_QUANT_DIM,
+                                                DEQUANT_SCALE_ROUNDING_MODE.value, num_warps=8)
 
     out_quant_tensor = out_quant_tensor.transpose(axis, src_tensor.ndim - 1)
     out_scale = out_scale.transpose(axis, src_tensor.ndim - 1)