Update documentation for _cast_fp4

ajrasane · ajrasane · commit a7730c0ebb36 · 2025-09-05T22:43:25.000Z
Signed-off-by: ajrasane &lt;131806219+ajrasane@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/quantization/qdq_utils.py b/modelopt/onnx/quantization/qdq_utils.py
@@ -612,7 +612,11 @@ def _cast_fp8(array: np.ndarray) -> np.ndarray:
 
 
 def _cast_fp4(array: np.ndarray) -> np.ndarray:
-    """Cast a numpy array to FLOAT4E2M1 using PyTorch."""
+    """Cast a numpy array to FLOAT4E2M1 using PyTorch.
+
+    Note: The first dimension of the array must be divisible by 2
+    as two FP4 values are packed into a single byte.
+    """
     array_f32_t = torch.from_numpy(array)
     array_f32_t_shape = array_f32_t.shape
     assert array_f32_t_shape[0] % 2 == 0, "array_f32_t_shape[0] must be divisible by 2"