expose rounding_mode in quantization for performance (#4862)

optimisea · facebook-github-bot · commit 9f81399ec468 · 2025-09-11T15:25:12.000-07:00
Summary: X-link: facebookresearch/FBGEMM#1884 Pull Request resolved: #4862 X-link: meta-pytorch/torchrec#3368 Expose the rounding_mode for mx4 as it could impact the QPS. Previous work was done here. D62466094 ``` class RoundingMode(IntEnum): """Rounding options for quantization.""" nearest = 0 floor = 1 even = 2 stochastic = 3 ceil = 4 ``` https://fburl.com/code/8prz4mem Reviewed By: victor-eds Differential Revision: D82001579 fbshipit-source-id: 872cd8ba62292b95e568ece47ac09052f28ca59e
diff --git a/fbgemm_gpu/fbgemm_gpu/quantize_comm.py b/fbgemm_gpu/fbgemm_gpu/quantize_comm.py
@@ -66,7 +66,7 @@ class QuantizationContext:
     row_dim: int = ROW_DIM_DEFAULT
     row_dim_quant: int = -1
     mx_group_size: int = MX_GROUP_SIZE_DEFAULT
-    rounding_mode: RoundingMode = RoundingMode.even
+    rounding_mode: Optional[RoundingMode] = RoundingMode.even
     padded_dim_sum_per_rank: Optional[List[int]] = None
 
 
@@ -167,6 +167,7 @@ def __init__(
         loss_scale: Optional[float] = None,
         row_dim: Optional[int] = None,
         is_fwd: bool = True,
+        rounding_mode: Optional[RoundingMode] = None,
     ) -> None:
         if loss_scale is not None:
             if comm_precision not in [SparseType.FP16, SparseType.BF16]:
@@ -183,8 +184,12 @@ def __init__(
         self._loss_scale = loss_scale
         self._is_fwd = is_fwd
         self._row_dim: int = -1 if row_dim is None else row_dim
+        self._rounding_mode: Optional[RoundingMode] = rounding_mode
         if self._comm_precision == SparseType.MX4:
             self._row_dim = MX_GROUP_SIZE_DEFAULT if row_dim is None else row_dim
+            self._rounding_mode = (
+                RoundingMode.even if rounding_mode is None else rounding_mode
+            )
 
     def encode(
         self, input_tensor: torch.Tensor, ctx: Optional[QuantizationContext] = None
@@ -258,7 +263,9 @@ def create_context(self) -> Optional[QuantizationContext]:
             return QuantizationContext(self._row_dim)
         if self._comm_precision == SparseType.MX4:
             return QuantizationContext(
-                row_dim=self._row_dim, mx_group_size=self._row_dim
+                row_dim=self._row_dim,
+                mx_group_size=self._row_dim,
+                rounding_mode=self._rounding_mode,
             )
         # int8 rowwise is default
         return QuantizationContext()