openvinotoolkit
diff --git a/‎src/nncf/openvino/optimized_functions/functions.py‎
Lines changed: 3 additions & 4 deletions b/‎src/nncf/openvino/optimized_functions/functions.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/nncf/openvino/optimized_functions/models.py‎
Lines changed: 12 additions & 12 deletions b/‎src/nncf/openvino/optimized_functions/models.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎src/nncf/quantization/algorithms/weight_compression/config.py‎
Lines changed: 36 additions & 1 deletion b/‎src/nncf/quantization/algorithms/weight_compression/config.py‎
Lines changed: 36 additions & 1 deletion
diff --git a/‎src/nncf/quantization/algorithms/weight_compression/constants.py‎
Lines changed: 9 additions & 0 deletions b/‎src/nncf/quantization/algorithms/weight_compression/constants.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/nncf/quantization/algorithms/weight_compression/fp8_conversion.py‎
Lines changed: 149 additions & 0 deletions b/‎src/nncf/quantization/algorithms/weight_compression/fp8_conversion.py‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎src/nncf/quantization/algorithms/weight_compression/openvino_backend.py‎
Lines changed: 5 additions & 26 deletions b/‎src/nncf/quantization/algorithms/weight_compression/openvino_backend.py‎
Lines changed: 5 additions & 26 deletions
@@ -116,7 +116,7 @@ def do_float_quantization(
     :param precomputed_scale: Optional precomputed scale.
     :return: Returns quantized weight tensor and corresponding scale tensor.
     """
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.MXFP4, CompressWeightsMode.FP4]
+    assert config.mode not in [CompressWeightsMode.CB4_F8E4M3, CompressWeightsMode.CODEBOOK]
 
     weight_shape = weight.shape
     scale_shape = None if precomputed_scale is None else precomputed_scale.shape
@@ -128,8 +128,7 @@ def do_float_quantization(
     if weight.backend == TensorBackend.ov:
         # Return ov tensors in target precision to seamlessly insert them into openvino model later
         ov_model_params.return_ov_tensors = True
-        weight_dtype = TensorDataType.nf4 if config.mode == CompressWeightsMode.NF4 else TensorDataType.f4e2m1
-        ov_model_params.output_dtypes.update({"compressed_weight": weight_dtype})
+        ov_model_params.output_dtypes.update({"compressed_weight": config.compression_dtype})
 
     model = get_float_quantization_model(
         ov_model_params,
@@ -234,7 +233,7 @@ def float_quantize_dequantize_weight(
     :param return_compressed_weight: If True, besides decompressed weight will also return compressed weight and scale.
     :return: Dequantized weight tensor or a tuple containing the decompressed weight, compressed weight and scale.
     """
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.MXFP4, CompressWeightsMode.FP4]
+    assert config.mode not in [CompressWeightsMode.CB4_F8E4M3, CompressWeightsMode.CODEBOOK]
 
     # When reduction axes are not provided, assuming that the weights are already reshaped
     if config.group_size != -1 and reduction_axes is not None:
 
@@ -31,6 +31,7 @@
 from nncf.openvino.graph.node_utils import convert_op
 from nncf.openvino.graph.node_utils import non_convertable_divide_op
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
+from nncf.quantization.algorithms.weight_compression.constants import FP_MAX_VALUES
 from nncf.tensor import Tensor
 from nncf.tensor import TensorDataType
 from nncf.tensor.functions.openvino_numeric import DTYPE_MAP as DTYPE_MAP_OV
@@ -571,7 +572,7 @@ def _build_float_quantization_model(
     reduction_axes: Optional[ReductionAxes] = None,
     return_nodes: bool = False,
 ) -> Union[ModelCallable, ModelAsNodes]:
-    assert config.mode in [CompressWeightsMode.NF4, CompressWeightsMode.MXFP4, CompressWeightsMode.FP4]
+    assert config.mode not in [CompressWeightsMode.CB4_F8E4M3, CompressWeightsMode.CODEBOOK]
 
     default_input_dtypes = {"scale": TensorDataType.float32}
     default_output_dtypes = {"compressed_weight": TensorDataType.float32, "scale": TensorDataType.float32}
@@ -597,7 +598,12 @@ def _build_float_quantization_model(
     )
 
     # Validate output dtypes
-    valid_compressed_weight_dtypes = [TensorDataType.float32, TensorDataType.nf4, TensorDataType.f4e2m1]
+    valid_compressed_weight_dtypes = [
+        TensorDataType.float32,
+        TensorDataType.nf4,
+        TensorDataType.f4e2m1,
+        TensorDataType.f8e4m3,
+    ]
     if compressed_weight_dtype not in valid_compressed_weight_dtypes:
         msg = (
             f"Compressed weight must be one of the following data types: {valid_compressed_weight_dtypes}. "
@@ -625,23 +631,17 @@ def _build_float_quantization_model(
         eps = np.finfo(np.float32).eps
         scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)
 
-        # Equals 1.0 for NF4
-        FP_MAX_VALS = {
-            CompressWeightsMode.MXFP4: 6.0,
-            CompressWeightsMode.FP4: 6.0,
-        }
-        if config.mode in FP_MAX_VALS:
-            scale = divide_op(scale, opset.constant(FP_MAX_VALS[config.mode], ov.Type.f32))
+        if config.compression_dtype != TensorDataType.nf4:
+            scale = divide_op(scale, opset.constant(FP_MAX_VALUES[config.compression_dtype], ov.Type.f32))
 
-        if config.mode == CompressWeightsMode.MXFP4:
+        if config.mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]:
             scale = opset.log(scale) / opset.log(opset.constant(2.0, ov.Type.f32))
             scale = opset.ceil(scale)
             scale = opset.clamp(scale, -127.0, 127.0)
             scale = opset.power(opset.constant(2.0, ov.Type.f32), scale)
 
     compressed_weight = divide_op(weight, scale)
-    target_dtype = ov.Type.nf4 if config.mode == CompressWeightsMode.NF4 else ov.Type.f4e2m1
-    compressed_weight = convert_op(compressed_weight, target_dtype)
+    compressed_weight = convert_op(compressed_weight, DTYPE_MAP_OV[config.compression_dtype])
     compressed_weight = convert_op(compressed_weight, DTYPE_MAP_OV[compressed_weight_dtype])
 
     ov_results = [compressed_weight]
 
@@ -46,7 +46,17 @@ def num_bits(self):
         """
         :return: number of bits that is used for storing a single quantized value in the given mode.
         """
-        return 8 if self.mode in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM] else 4
+        return (
+            8
+            if self.mode
+            in [
+                CompressWeightsMode.INT8_SYM,
+                CompressWeightsMode.INT8_ASYM,
+                CompressWeightsMode.FP8_E4M3,
+                CompressWeightsMode.MXFP8_E4M3,
+            ]
+            else 4
+        )
 
     @property
     def is_asym_mode(self):
@@ -74,6 +84,31 @@ def is_codebook(self):
         """
         return self.mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]
 
+    @property
+    def compression_dtype(self) -> TensorDataType:
+        """
+        :return: data type that is used to store compressed weights.
+        """
+        if self.is_codebook:
+            n_quants = self.codebook_values.size
+            if n_quants <= 16:
+                return TensorDataType.uint4
+            if n_quants <= 256:
+                return TensorDataType.uint8
+            return TensorDataType.uint16
+        dtype_per_mode = {
+            CompressWeightsMode.INT4_SYM: TensorDataType.int4,
+            CompressWeightsMode.INT4_ASYM: TensorDataType.uint4,
+            CompressWeightsMode.INT8_ASYM: TensorDataType.uint8,
+            CompressWeightsMode.INT8_SYM: TensorDataType.int8,
+            CompressWeightsMode.NF4: TensorDataType.nf4,
+            CompressWeightsMode.FP4: TensorDataType.f4e2m1,
+            CompressWeightsMode.MXFP4: TensorDataType.f4e2m1,
+            CompressWeightsMode.FP8_E4M3: TensorDataType.f8e4m3,
+            CompressWeightsMode.MXFP8_E4M3: TensorDataType.f8e4m3,
+        }
+        return dtype_per_mode[self.mode]
+
     def get_numpy_codebook(self):
         return self.codebook_values.as_numpy_tensor()
 
 
@@ -11,6 +11,8 @@
 
 import numpy as np
 
+from nncf.tensor import TensorDataType
+
 NF4_QUANTILES = np.array(
     [
         -1.0,
@@ -101,3 +103,10 @@
 
 
 CENTER_OF_F4E2M1_QUANTILES = (F4E2M1_QUANTILES[1:] + F4E2M1_QUANTILES[:-1]) / 2
+
+
+FP_MAX_VALUES = {
+    TensorDataType.nf4: 1.0,
+    TensorDataType.f4e2m1: 6.0,
+    TensorDataType.f8e4m3: 448.0,
+}
@@ -0,0 +1,149 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+F8E4M3_LUT = np.array(
+    [
+        0.0,      0.001953125, 0.00390625, 0.005859375, 0.0078125, 0.009765625, 0.01171875, 0.013671875,
+        0.015625, 0.017578125, 0.01953125, 0.021484375, 0.0234375, 0.025390625, 0.02734375, 0.029296875,
+        0.03125,  0.03515625,  0.0390625,  0.04296875,  0.046875,  0.05078125,  0.0546875,  0.05859375,
+        0.0625,   0.0703125,   0.078125,   0.0859375,   0.09375,   0.1015625,   0.109375,   0.1171875,
+        0.125,    0.140625,    0.15625,    0.171875,    0.1875,    0.203125,    0.21875,    0.234375,
+        0.25,     0.28125,     0.3125,     0.34375,     0.375,     0.40625,     0.4375,     0.46875,
+        0.5,      0.5625,      0.625,      0.6875,      0.75,      0.8125,      0.875,      0.9375,
+        1.0,      1.125,       1.25,       1.375,       1.5,       1.625,       1.75,       1.875,
+        2.0,      2.25,        2.5,        2.75,        3.0,       3.25,        3.5,        3.75,
+        4.0,      4.5,         5.0,        5.5,         6.0,       6.5,         7.0,        7.5,
+        8.0,      9.0,         10.0,       11.0,        12.0,      13.0,        14.0,       15.0,
+        16.0,     18.0,        20.0,       22.0,        24.0,      26.0,        28.0,       30.0,
+        32.0,     36.0,        40.0,       44.0,        48.0,      52.0,        56.0,       60.0,
+        64.0,     72.0,        80.0,       88.0,        96.0,      104.0,       112.0,      120.0,
+        128.0,    144.0,       160.0,      176.0,       192.0,     208.0,       224.0,      240.0,
+        256.0,    288.0,       320.0,      352.0,       384.0,     416.0,       448.0,      np.nan,
+    ],
+    dtype=np.float32,
+)
+
+
+def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
+    """Exact port of ov::f16_to_f8e4m3_bits for a single float16 bit-pattern."""
+    # f16 layout
+    f16_s_mask = 0x8000
+    f16_e_mask = 0x7C00
+    f16_e_bias = 15
+    f16_e_size = 5
+    f16_m_mask = 0x03FF
+    f16_m_size = 10
+
+    # f8 e4m3 layout
+    f8e4m3_s_mask = 0x80
+    f8e4m3_e_size = 4
+    f8e4m3_e_mask = 0x78
+    f8e4m3_e_bias = 7
+    f8e4m3_e_max = 0x0F
+    f8e4m3_m_size = 3
+    f8e4m3_m_mask = 0x07
+
+    byte_shift = 8
+
+    # f8 masks in uint16 domain
+    f8_e_mask = f8e4m3_e_mask << byte_shift      # 0x7800
+    f8_m_mask = f8e4m3_m_mask << byte_shift      # 0x0700
+    f8_m_hidden_one_mask = 0x0800                # hidden 1 for subnormals
+
+    # rounding constants (same as C++)
+    round_half = 0x01FF
+    round_norm = 0x007F
+    round_even = 0x0080
+    round_odd = 0x0180
+
+    # min exponent for which subnormals are representable
+    f8_e_subnormal_min = -10
+
+    inp = int(h_bits) & 0xFFFF
+
+    # sign bit: f16 sign -> f8 sign position (bit 15 -> bit 7)
+    f8_bits = (inp & f16_s_mask) >> byte_shift
+
+    f16_e_field = inp & f16_e_mask
+
+    if f16_e_field == f16_e_mask:
+        # f16 NaN / Inf -> f8 NaN (no Inf)
+        f8_bits |= (f8e4m3_e_mask | f8e4m3_m_mask)
+    elif f16_e_field != 0:
+        # normalized f16
+        f8_biased_exp = (f16_e_field >> f16_m_size) - (f16_e_bias - f8e4m3_e_bias)
+        # *** IMPORTANT FIX: shift by (f16_e_size - f8e4m3_e_size) = 5 - 4 = 1 ***
+        fractional = (inp & f16_m_mask) << (f16_e_size - f8e4m3_e_size)
+
+        # normalized f8 part (exp >= 0)
+        if f8_biased_exp >= 0:
+            if (fractional & round_half) == round_odd or (fractional & round_norm) != 0:
+                fractional += round_even
+                if (fractional & f8_e_mask) != 0:
+                    f8_biased_exp += 1
+            fractional &= f8_m_mask
+
+        # now set exponent & mantissa
+        if f8_biased_exp > f8e4m3_e_max:
+            # overflow -> NaN (no Inf)
+            f8_bits |= (f8e4m3_e_mask | f8e4m3_m_mask)
+        elif f8_biased_exp > 0:
+            # normalized f8
+            exp_field = (f8_biased_exp & (f8e4m3_e_mask >> f8e4m3_m_size)) << f8e4m3_m_size
+            f8_bits |= exp_field
+            f8_bits |= (fractional >> byte_shift)
+        else:
+            # subnormal f8
+            fractional = f8_m_hidden_one_mask | ((inp & f16_m_mask) << (f16_e_size - f8e4m3_e_size))
+            f8_exp = f8_biased_exp - f8e4m3_e_bias
+            shift = 1 - f8_exp
+            sticky_mask = 0 if f8_exp < f8_e_subnormal_min else ((1 << shift) - 1)
+            sticky = 1 if (fractional & sticky_mask) != 0 else 0
+
+            fractional = 0 if f8_exp < f8_e_subnormal_min else (fractional >> (1 - f8_biased_exp))
+
+            if (((fractional & round_half) == round_odd and sticky == 0) or
+                (fractional & round_norm) != 0 or sticky != 0):
+                fractional += round_even
+
+            f8_bits |= (fractional >> byte_shift)
+    else:
+        # f16 zero / subnormal -> sign + zero exponent/mantissa
+        # (f8_bits already contains the sign)
+        pass
+
+    return f8_bits & 0xFF
+
+
+_f16_to_f8e4m3_bits_vec = np.vectorize(_f16_to_f8e4m3_bits_scalar, otypes=[np.uint8])
+
+
+def fp32_to_fp8e4m3_values(x: np.ndarray) -> np.ndarray:
+    """
+    Bit-exact to ov::float8_e4m3(float):
+        float32 -> float16 -> f8e4m3 bits -> float via LUT
+    """
+    x = np.asarray(x, dtype=np.float32)
+    x_f16 = x.astype(np.float16)
+    h_bits = x_f16.view(np.uint16)
+
+    f8_bits = _f16_to_f8e4m3_bits_vec(h_bits)
+
+    # Decode exactly like C++: LUT for magnitude + sign bit
+    idx = f8_bits & 0x7F
+    mag = F8E4M3_LUT[idx.astype(np.int32)]
+
+    sign = np.where((f8_bits & 0x80) != 0, -1.0, 1.0)
+    out = sign * mag
+    return out.astype(np.float32)
@@ -64,6 +64,7 @@
 from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
 from nncf.tensor import Tensor
 from nncf.tensor.definitions import TensorDataType
+from nncf.tensor.functions.openvino_numeric import DTYPE_MAP
 from nncf.tensor.functions.openvino_numeric import DTYPE_MAP_REV
 
 
@@ -223,32 +224,11 @@ def _create_compression_subgraph(
         should_add_convert_node: bool,
         precomputed_compressed_weight: Optional[CompressedWeight] = None,
     ):
-        scale_dtype = ov.Type.f16
-        if compression_config.mode == CompressWeightsMode.NF4:
-            compression_dtype = ov.Type.nf4
-        elif compression_config.mode == CompressWeightsMode.MXFP4:
-            compression_dtype = ov.Type.f4e2m1
+        compression_dtype = DTYPE_MAP[compression_config.compression_dtype]
+        if compression_config.mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]:
             scale_dtype = ov.Type.f8e8m0
-        elif compression_config.mode == CompressWeightsMode.MXFP8_E4M3:
-            compression_dtype = ov.Type.f8e4m3
-            scale_dtype = ov.Type.f8e8m0
-        elif compression_config.mode == CompressWeightsMode.FP8_E4M3:
-            compression_dtype = ov.Type.f8e4m3
-        elif compression_config.mode == CompressWeightsMode.FP4:
-            compression_dtype = ov.Type.f4e2m1
-        elif compression_config.mode == CompressWeightsMode.INT4_SYM:
-            compression_dtype = ov.Type.i4
-        elif compression_config.mode == CompressWeightsMode.INT4_ASYM:
-            compression_dtype = ov.Type.u4
-        elif compression_config.mode == CompressWeightsMode.INT8_SYM:
-            compression_dtype = ov.Type.i8
-        elif compression_config.mode == CompressWeightsMode.INT8_ASYM:
-            compression_dtype = ov.Type.u8
-        elif compression_config.is_codebook:
-            compression_dtype = None
         else:
-            msg = f"{compression_config.mode.value} is not supported."
-            raise nncf.ParameterNotSupportedError(msg)
+            scale_dtype = ov.Type.f16
 
         original_shape = weight.shape
 
@@ -261,8 +241,7 @@ def _create_compression_subgraph(
             )
 
         if compression_config.is_codebook:
-            n_quants = compressed_weight.codebook.size - 1
-            compression_dtype = ov.Type.u16 if n_quants > 255 else (ov.Type.u8 if n_quants > 15 else ov.Type.u4)
+            compression_dtype = DTYPE_MAP[compression_config.compression_dtype]
             converted_const = create_ov_codebook_subgraph(
                 codebook=compressed_weight.codebook
                 if compression_config.mode == CompressWeightsMode.CODEBOOK