openvinotoolkit · nikita-savelyevv · Jun 17, 2025 · Jun 23, 2025 · Jul 22, 2025 · Jul 29, 2025
diff --git a/.github/workflows/call_precommit.yml b/.github/workflows/call_precommit.yml
@@ -91,6 +91,8 @@ jobs:
         shell: bash
       - name: Install NNCF and test requirements
         run: pip install . -r tests/openvino/requirements.txt
+      - name: Install OpenVINO nightly
+        run: pip install -U --pre openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
       - name: Print installed modules
         run: pip list
       - name: Run OV precommit test scope

@@ -44,6 +44,8 @@ def convert_to_nncf_dtype(ov_type: ov.Type) -> Dtype:
         type_name = ov_type.get_type_name()
         conversion_map = {
             "nf4": "float",
+            "f4e2m1": "float",
+            "f8e8m0": "float",
             "f8e4m3": "float",
             "f8e5m2": "float",
             "f16": "float",

@@ -107,17 +107,17 @@ def do_float_quantization(
     precomputed_scale: Optional[Tensor] = None,
 ) -> tuple[Tensor, Tensor, Tensor]:
     """
-    Computes quantization scale if not provided, and performs corresponding nf4 weight quantization.
+    Computes quantization scale if not provided, and performs corresponding float weight quantization.
     For NF4 quantization quantizes the weights to 16 levels on [-1, 1] interval.
-    TODO(nikita-savelyevv): add support for E2M1 once ticket 164851 is resolved
+    For E2M1 quantization quantizes the weights to 16 levels on [-6, 6] interval.
 
     :param weight: Weight array to compress.
     :param config: Weight compression configuration.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics.
     :param precomputed_scale: Optional precomputed scale.
-    :return: Returns quantized (for e2m1 normalized) weight tensor and corresponding scale tensor.
+    :return: Returns quantized weight tensor and corresponding scale tensor.
     """
-    assert config.mode == CompressWeightsMode.NF4
+    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
 
     weight_shape = weight.shape
     scale_shape = None if precomputed_scale is None else precomputed_scale.shape
@@ -129,7 +129,8 @@ def do_float_quantization(
     if weight.backend == TensorBackend.ov:
         # Return ov tensors in target precision to seamlessly insert them into openvino model later
         ov_model_params.return_ov_tensors = True
-        ov_model_params.output_dtypes.update({"compressed_weight": TensorDataType.nf4})
+        weight_dtype = TensorDataType.f4e2m1 if config.mode == CompressWeightsMode.E2M1 else TensorDataType.nf4
+        ov_model_params.output_dtypes.update({"compressed_weight": weight_dtype})
 
     model = get_float_quantization_model(
         ov_model_params,
@@ -235,7 +236,7 @@ def float_quantize_dequantize_weight(
     :param return_compressed_weight: If True, besides decompressed weight will also return compressed weight and scale.
     :return: Dequantized weight tensor or a tuple containing the decompressed weight, compressed weight and scale.
     """
-    assert config.mode == CompressWeightsMode.NF4
+    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
 
     # When reduction axes are not provided, assuming that the weights are already reshaped
     if config.group_size != -1 and reduction_axes is not None:

@@ -284,7 +284,8 @@ def get_float_quantization_model(
     reduction_axes: Optional[ReductionAxes] = None,
 ) -> Union[ModelCallable, ModelAsNodes]:
     """
-    Get a model that compresses weights to float (currently only nf4) destination type using the given configuration.
+    Get a model that compresses weights to float (currently nf4 or f4e2m1) destination type using the given
+    configuration.
 
     :param ov_model_params: OV model parameters.
     :param config: Compression configuration.
@@ -569,7 +570,7 @@ def _build_float_quantization_model(
     reduction_axes: Optional[ReductionAxes] = None,
     return_nodes: bool = False,
 ) -> Union[ModelCallable, ModelAsNodes]:
-    assert config.mode == CompressWeightsMode.NF4
+    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
 
     default_input_dtypes = {"scale": TensorDataType.float32}
     default_output_dtypes = {"compressed_weight": TensorDataType.float32, "scale": TensorDataType.float32}
@@ -595,8 +596,7 @@ def _build_float_quantization_model(
     )
 
     # Validate output dtypes
-    # TODO: add support for f4e2m1 once ticket 164851 is resolved
-    valid_compressed_weight_dtypes = [TensorDataType.float32, TensorDataType.nf4]
+    valid_compressed_weight_dtypes = [TensorDataType.float32, TensorDataType.nf4, TensorDataType.f4e2m1]
     if compressed_weight_dtype not in valid_compressed_weight_dtypes:
         msg = (
             f"Compressed weight must be one of the following data types: {valid_compressed_weight_dtypes}. "
@@ -624,8 +624,16 @@ def _build_float_quantization_model(
         eps = np.finfo(np.float32).eps
         scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)
 
+        if config.mode == CompressWeightsMode.E2M1:
+            scale = scale / opset.constant(6.0, ov.Type.f32)
+            scale = opset.log(scale) / opset.log(opset.constant(2.0, ov.Type.f32))
+            scale = opset.ceil(scale)
+            scale = opset.clamp(scale, -127.0, 127.0)
+            scale = opset.power(opset.constant(2.0, ov.Type.f32), scale)
+
     compressed_weight = divide_op(weight, scale)
-    compressed_weight = convert_op(compressed_weight, ov.Type.nf4)
+    target_dtype = ov.Type.nf4 if config.mode == CompressWeightsMode.NF4 else ov.Type.f4e2m1
+    compressed_weight = convert_op(compressed_weight, target_dtype)
     compressed_weight = convert_op(compressed_weight, DTYPE_MAP_OV[compressed_weight_dtype])
 
     ov_results = [compressed_weight]

@@ -33,6 +33,27 @@
     dtype=np.float32,
 )
 
+E2M1_QUANTILES = np.array(
+    [
+        -6.0,
+        -4.0,
+        -3.0,
+        -2.0,
+        -1.5,
+        -1.0,
+        -0.5,
+        -0.0,
+        0.5,
+        1.0,
+        1.5,
+        2.0,
+        3.0,
+        4.0,
+        6.0,
+    ],
+    dtype=np.float32,
+)
+
 
 CB4_QUANTILES = np.array(
     [
@@ -77,3 +98,6 @@
     ],
     dtype=np.float32,
 )
+
+
+CENTER_OF_E2M1_QUANTILES = (E2M1_QUANTILES[1:] + E2M1_QUANTILES[:-1]) / 2
@@ -19,7 +19,9 @@
 from nncf.errors import UnsupportedModelError
 from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
+from nncf.quantization.algorithms.weight_compression.constants import CENTER_OF_E2M1_QUANTILES
 from nncf.quantization.algorithms.weight_compression.constants import CENTER_OF_NF4_QUANTILES
+from nncf.quantization.algorithms.weight_compression.constants import E2M1_QUANTILES
 from nncf.quantization.algorithms.weight_compression.constants import NF4_QUANTILES
 from nncf.quantization.algorithms.weight_compression.parameters import CompressedWeight
 from nncf.quantization.fake_quantize import calculate_scale_zero_point
@@ -136,14 +138,14 @@ def do_float_quantization(
     """
     Computes quantization scale if not provided, and performs corresponding (nf4, e2m1) weight quantization.
     For NF4 quantization quantizes the weights to 16 levels on [-1, 1] interval.
-    For E2M1 and CODEBOOK currently returns normalized weight without quantization.
-    TODO(nikita-savelyevv): add support for E2M1 once ticket 164851 is resolved
+    For E2M1 quantization quantizes the weights to 16 levels on [-6, 6] interval.
+    For CODEBOOK currently returns normalized weight without quantization.
 
     :param weight: Weight array to compress.
     :param config: Weight compression configuration.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics.
     :param precomputed_scale: Optional precomputed scale.
-    :return: Returns quantized (for e2m1 normalized) weight tensor and corresponding scale tensor and
+    :return: Returns quantized (for codebook normalized) weight tensor and corresponding scale tensor and
              optional indexes for codebook.
     """
     assert not config.is_integer
@@ -153,7 +155,7 @@ def do_float_quantization(
         weight, reduction_axes = reshape_weight_for_grouped_quantization(weight, reduction_axes, config.group_size)
 
     # Optimized implementation
-    if config.mode == CompressWeightsMode.NF4 and _can_run_optimized(weight):
+    if _can_run_optimized(weight):
         from nncf.openvino.optimized_functions import do_float_quantization as do_float_quantization_ov
 
         return do_float_quantization_ov(weight, config, reduction_axes, precomputed_scale)
@@ -168,20 +170,18 @@ def do_float_quantization(
     if scale is None:
         scale = calculate_float_quantization_params(weight, reduction_axes, config)
     norm_weight = _calculate_normalized_weight(weight, scale)
-    if config.mode == CompressWeightsMode.NF4:
-        if original_weight_backend == TensorBackend.ov:
-            # Can convert through OpenVINO and return OpenVINO-native NF4 tensor
-            compressed_weight = norm_weight.as_openvino_tensor().astype(TensorDataType.nf4)
-        else:
-            compressed_weight = _calculate_nf4_quantized_weight(norm_weight)
-    elif config.is_codebook:
+    if config.is_codebook:
         compressed_weight, indexes = _calculate_codebook_quantized_weight(
             norm_weight, quantiles=config.get_numpy_codebook()
         )
         return compressed_weight, scale, indexes
+
+    if original_weight_backend == TensorBackend.ov:
+        # Can convert through OpenVINO and return OpenVINO-native tensor
+        target_dtype = TensorDataType.nf4 if config.mode == CompressWeightsMode.NF4 else TensorDataType.f4e2m1
+        compressed_weight = norm_weight.as_openvino_tensor().astype(target_dtype)
     else:
-        # TODO(nikita-savelyevv): add support for E2M1 once ticket 164851 is resolved
-        compressed_weight = norm_weight
+        compressed_weight = _calculate_float_quantized_weight(norm_weight, config.mode)
     return compressed_weight, scale, None
 
 
@@ -193,8 +193,7 @@ def float_quantize_dequantize_weight(
     return_compressed_weight: Optional[bool] = False,
 ) -> Union[Tensor, tuple[Tensor, Tensor, Tensor]]:
     """
-    First quantizes the given weight tensor to float (nf4) dtype and then dequantizes it back to obtain float32 values.
-    E2M1 mode is currently not supported.
+    First quantizes the given weight tensor to float dtype and then dequantizes it back to obtain float32 values.
 
     :param weight: The weight tensor to quantize-dequantize.
     :param config: Compression configuration.
@@ -203,11 +202,15 @@ def float_quantize_dequantize_weight(
     :param return_compressed_weight: If True, besides decompressed weight will also return compressed weight and scale.
     :return: Dequantized weight tensor or a tuple containing the decompressed weight, compressed weight and scale.
     """
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]
-    # TODO(nikita-savelyevv): add support for f4e2m1 once ticket 164851 is resolved
+    assert config.mode in [
+        CompressWeightsMode.NF4,
+        CompressWeightsMode.E2M1,
+        CompressWeightsMode.CODEBOOK,
+        CompressWeightsMode.CB4_F8E4M3,
+    ]
 
     # Optimized implementation
-    if config.mode == CompressWeightsMode.NF4 and _can_run_optimized(weight):
+    if config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1] and _can_run_optimized(weight):
         from nncf.openvino.optimized_functions import (
             float_quantize_dequantize_weight as float_quantize_dequantize_weight_ov,
         )
@@ -496,17 +499,30 @@ def integer_quantize_dequantize_weight(
         return decompressed_weight
 
 
-def _calculate_nf4_quantized_weight(norm_weight: Tensor) -> Tensor:
+def _calculate_float_quantized_weight(norm_weight: Tensor, mode: CompressWeightsMode) -> Tensor:
     """
-    Performs NF4 quantization. Look-up table is used to "round" or "quantize" to the closest quant.
+    Performs float (currently NF4 or F4E2M1) quantization. Look-up table is used to "round" or "quantize" to the
+    closest quant.
 
-    :param norm_weight: Weight tensor to quantize already normalized to [-1, 1] range.
-    :return: Tensor with floating-point values, where each of them corresponds to 1 out of 16 quants on [-1, 1].
+    :param norm_weight: Normalized weight tensor to quantize.
+    :return: Tensor with floating-point values, where each of them corresponds to 1 out of 16 quants.
     """
-    center_nf4_quantiles = fns.from_numpy(CENTER_OF_NF4_QUANTILES, backend=norm_weight.backend)
-    indexes = fns.searchsorted(center_nf4_quantiles, norm_weight)
-    nf4_quantiles = fns.from_numpy(NF4_QUANTILES, backend=indexes.backend)
-    quantized_weight = nf4_quantiles[indexes]
+    assert mode in [CompressWeightsMode.NF4, CompressWeightsMode.E2M1]
+    quantiles_np = NF4_QUANTILES if mode == CompressWeightsMode.NF4 else E2M1_QUANTILES
+    quantile_centers_np = CENTER_OF_NF4_QUANTILES if mode == CompressWeightsMode.NF4 else CENTER_OF_E2M1_QUANTILES
+    quantile_centers = fns.from_numpy(quantile_centers_np, backend=norm_weight.backend)
+    indexes = fns.searchsorted(quantile_centers, norm_weight)
+    quantiles = fns.from_numpy(quantiles_np, backend=indexes.backend)
+
+    if mode == CompressWeightsMode.E2M1:
+        # If in-between two quantiles, round to the nearest even quantile.
+        shifted_indexes = fns.clip(indexes + 1, 0, quantiles.size - 1)
+        dist_left = fns.abs(norm_weight - quantiles[indexes])
+        dist_right = fns.abs(norm_weight - quantiles[shifted_indexes])
+        choose_right = (dist_right < dist_left) | ((dist_left == dist_right) & ((shifted_indexes + 1) % 2 == 0))
+        indexes = fns.where(choose_right, shifted_indexes, indexes)
+
+    quantized_weight = quantiles[indexes]
     return quantized_weight
 
 

@@ -44,6 +44,8 @@ class TensorDataType(StrEnum):
     float64 = auto()
     f8e4m3 = auto()
     f8e5m2 = auto()
+    f8e8m0 = auto()
+    f4e2m1 = auto()
     nf4 = auto()
     int8 = auto()
     int32 = auto()

@@ -23,6 +23,8 @@
 
 DTYPE_MAP: dict[TensorDataType, ov.Type] = {
     TensorDataType.nf4: ov.Type.nf4,
+    TensorDataType.f4e2m1: ov.Type.f4e2m1,
+    TensorDataType.f8e8m0: ov.Type.f8e8m0,
     TensorDataType.f8e4m3: ov.Type.f8e4m3,
     TensorDataType.f8e5m2: ov.Type.f8e5m2,
     TensorDataType.float16: ov.Type.f16,
@@ -42,6 +44,8 @@
     TensorDataType.int4,
     TensorDataType.uint4,
     TensorDataType.nf4,
+    TensorDataType.f4e2m1,
+    TensorDataType.f8e8m0,
     TensorDataType.f8e4m3,
     TensorDataType.f8e5m2,
 ]
@@ -95,7 +99,7 @@ def _(a: ov.Tensor, shape: Union[int, tuple[int, ...]]) -> ov.Tensor:
 
 @numeric.as_numpy_tensor.register
 def _(a: ov.Tensor) -> NDArray[Any]:
-    # Cannot convert bfloat16, uint4, int4, nf4, f8e4m3, f8e5m2 to numpy directly
+    # Cannot convert bfloat16, uint4, int4, nf4, f4e2m1, f8e8m0, f8e4m3, f8e5m2 to numpy directly
     a_dtype = DTYPE_MAP_REV[a.get_element_type()]
     if a_dtype in NATIVE_OV_CAST_DTYPES:
         dtype = TensorDataType.float32

@@ -84,6 +84,12 @@ def __len__(self) -> int:
 
     # built-in operations
 
+    def __or__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
+        return Tensor(self.data | unwrap_tensor_data(other))
+
+    def __and__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
+        return Tensor(self.data & unwrap_tensor_data(other))
+
     def __add__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
         return Tensor(self.data + unwrap_tensor_data(other))
 
@@ -144,6 +150,9 @@ def __ifloordiv__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
         self._data //= unwrap_tensor_data(other)
         return self
 
+    def __mod__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
+        return cast(Tensor, _call_function("_binary_op_nowarn", self, other, operator.mod))
+
     def __matmul__(self, other: Union[Tensor, T_NUMBER]) -> Tensor:
         return Tensor(self.data @ unwrap_tensor_data(other))
 

diff --git a/src/nncf/version.py b/src/nncf/version.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.18.0"
+__version__ = "2.18.0.dev0+66c0366aedirty"
 
 
 BKC_TORCH_SPEC = "==2.7.*"

@@ -48,6 +48,8 @@
 }
 BINARY_OPERATORS = ["add", "sub", "pow", "mul", "truediv", "floordiv"]
 
+BOOLEAN_OPERATOR_MAP = {"and": operator.and_, "or": operator.or_}
+
 COMPARISON_OPERATOR_MAP = {
     "lt": operator.lt,
     "le": operator.le,
@@ -98,6 +100,25 @@ def test_operator_clone(self):
         assert id(tensor_a.data) is not id(tensor_b.data)
         assert all(tensor_a == tensor_b)
 
+    @pytest.mark.parametrize("op_name", BOOLEAN_OPERATOR_MAP.keys())
+    @pytest.mark.parametrize("value", [True, False])
+    def test_operators_bool(self, op_name, value):
+        tensor_a = self.to_tensor([True, False])
+
+        nncf_tensor_a = Tensor(tensor_a)
+
+        fn = BOOLEAN_OPERATOR_MAP[op_name]
+        res = fn(tensor_a, value)
+        res_nncf = fn(nncf_tensor_a, value)
+
+        assert res.dtype == res_nncf.data.dtype
+        assert all(res == res_nncf.data)
+        assert isinstance(res_nncf, Tensor)
+        if (
+            self.backend() != TensorBackend.tf
+        ):  # native Tensorflow operaors do not guarantee to return a tensor on an initial device.
-        ):  # native Tensorflow operaors do not guarantee to return a tensor on an initial device.
+        ):  # native Tensorflow operators do not guarantee to return a tensor on an initial device.
-        ):  # native Tensorflow operaors do not guarantee to return a tensor on an initial device.
+        ):  # native Tensorflow operators do not guarantee to return a tensor on an initial device.
+            assert res_nncf.device == nncf_tensor_a.device
+
     @pytest.mark.parametrize("op_name", OPERATOR_MAP.keys())
     def test_operators_tensor(self, op_name):
         tensor_a = self.to_tensor([1.0, 2.0])
@@ -1982,6 +2003,8 @@ def test_fn_zeros(self):
                     TensorDataType.int4,
                     TensorDataType.uint4,
                     TensorDataType.nf4,
+                    TensorDataType.f4e2m1,
+                    TensorDataType.f8e8m0,
                     TensorDataType.f8e4m3,
                     TensorDataType.f8e5m2,
                 ]
@@ -2014,6 +2037,8 @@ def test_fn_eye(self, n, m, ref):
                     TensorDataType.int4,
                     TensorDataType.uint4,
                     TensorDataType.nf4,
+                    TensorDataType.f4e2m1,
+                    TensorDataType.f8e8m0,
                     TensorDataType.f8e4m3,
                     TensorDataType.f8e5m2,
                 ]