Fix tests

nikita-savelyevv · nikita-savelyevv · commit bd421c7a5a43 · 2025-11-20T17:26:02.000+01:00
diff --git a/src/nncf/quantization/algorithms/weight_compression/fp8_conversion.py b/src/nncf/quantization/algorithms/weight_compression/fp8_conversion.py
@@ -11,28 +11,29 @@
 
 import numpy as np
 
-
+# fmt: off
 F8E4M3_LUT = np.array(
     [
-        0.0,      0.001953125, 0.00390625, 0.005859375, 0.0078125, 0.009765625, 0.01171875, 0.013671875,
-        0.015625, 0.017578125, 0.01953125, 0.021484375, 0.0234375, 0.025390625, 0.02734375, 0.029296875,
-        0.03125,  0.03515625,  0.0390625,  0.04296875,  0.046875,  0.05078125,  0.0546875,  0.05859375,
-        0.0625,   0.0703125,   0.078125,   0.0859375,   0.09375,   0.1015625,   0.109375,   0.1171875,
-        0.125,    0.140625,    0.15625,    0.171875,    0.1875,    0.203125,    0.21875,    0.234375,
-        0.25,     0.28125,     0.3125,     0.34375,     0.375,     0.40625,     0.4375,     0.46875,
-        0.5,      0.5625,      0.625,      0.6875,      0.75,      0.8125,      0.875,      0.9375,
-        1.0,      1.125,       1.25,       1.375,       1.5,       1.625,       1.75,       1.875,
-        2.0,      2.25,        2.5,        2.75,        3.0,       3.25,        3.5,        3.75,
-        4.0,      4.5,         5.0,        5.5,         6.0,       6.5,         7.0,        7.5,
-        8.0,      9.0,         10.0,       11.0,        12.0,      13.0,        14.0,       15.0,
-        16.0,     18.0,        20.0,       22.0,        24.0,      26.0,        28.0,       30.0,
-        32.0,     36.0,        40.0,       44.0,        48.0,      52.0,        56.0,       60.0,
-        64.0,     72.0,        80.0,       88.0,        96.0,      104.0,       112.0,      120.0,
-        128.0,    144.0,       160.0,      176.0,       192.0,     208.0,       224.0,      240.0,
-        256.0,    288.0,       320.0,      352.0,       384.0,     416.0,       448.0,      np.nan,
+        0.0,      0.001953125, 0.00390625, 0.005859375, 0.0078125, 0.009765625, 0.01171875, 0.013671875,    # noqa
+        0.015625, 0.017578125, 0.01953125, 0.021484375, 0.0234375, 0.025390625, 0.02734375, 0.029296875,    # noqa
+        0.03125,  0.03515625,  0.0390625,  0.04296875,  0.046875,  0.05078125,  0.0546875,  0.05859375,     # noqa
+        0.0625,   0.0703125,   0.078125,   0.0859375,   0.09375,   0.1015625,   0.109375,   0.1171875,     # noqa
+        0.125,    0.140625,    0.15625,    0.171875,    0.1875,    0.203125,    0.21875,    0.234375,     # noqa
+        0.25,     0.28125,     0.3125,     0.34375,     0.375,     0.40625,     0.4375,     0.46875,     # noqa
+        0.5,      0.5625,      0.625,      0.6875,      0.75,      0.8125,      0.875,      0.9375,     # noqa
+        1.0,      1.125,       1.25,       1.375,       1.5,       1.625,       1.75,       1.875,     # noqa
+        2.0,      2.25,        2.5,        2.75,        3.0,       3.25,        3.5,        3.75,     # noqa
+        4.0,      4.5,         5.0,        5.5,         6.0,       6.5,         7.0,        7.5,     # noqa
+        8.0,      9.0,         10.0,       11.0,        12.0,      13.0,        14.0,       15.0,     # noqa
+        16.0,     18.0,        20.0,       22.0,        24.0,      26.0,        28.0,       30.0,     # noqa
+        32.0,     36.0,        40.0,       44.0,        48.0,      52.0,        56.0,       60.0,     # noqa
+        64.0,     72.0,        80.0,       88.0,        96.0,      104.0,       112.0,      120.0,     # noqa
+        128.0,    144.0,       160.0,      176.0,       192.0,     208.0,       224.0,      240.0,     # noqa
+        256.0,    288.0,       320.0,      352.0,       384.0,     416.0,       448.0,      np.nan,     # noqa
     ],
     dtype=np.float32,
 )
+# fmt: on
 
 
 def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
@@ -46,7 +47,6 @@ def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
     f16_m_size = 10
 
     # f8 e4m3 layout
-    f8e4m3_s_mask = 0x80
     f8e4m3_e_size = 4
     f8e4m3_e_mask = 0x78
     f8e4m3_e_bias = 7
@@ -57,9 +57,9 @@ def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
     byte_shift = 8
 
     # f8 masks in uint16 domain
-    f8_e_mask = f8e4m3_e_mask << byte_shift      # 0x7800
-    f8_m_mask = f8e4m3_m_mask << byte_shift      # 0x0700
-    f8_m_hidden_one_mask = 0x0800                # hidden 1 for subnormals
+    f8_e_mask = f8e4m3_e_mask << byte_shift  # 0x7800
+    f8_m_mask = f8e4m3_m_mask << byte_shift  # 0x0700
+    f8_m_hidden_one_mask = 0x0800  # hidden 1 for subnormals
 
     # rounding constants (same as C++)
     round_half = 0x01FF
@@ -79,7 +79,7 @@ def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
 
     if f16_e_field == f16_e_mask:
         # f16 NaN / Inf -> f8 NaN (no Inf)
-        f8_bits |= (f8e4m3_e_mask | f8e4m3_m_mask)
+        f8_bits |= f8e4m3_e_mask | f8e4m3_m_mask
     elif f16_e_field != 0:
         # normalized f16
         f8_biased_exp = (f16_e_field >> f16_m_size) - (f16_e_bias - f8e4m3_e_bias)
@@ -97,12 +97,12 @@ def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
         # now set exponent & mantissa
         if f8_biased_exp > f8e4m3_e_max:
             # overflow -> NaN (no Inf)
-            f8_bits |= (f8e4m3_e_mask | f8e4m3_m_mask)
+            f8_bits |= f8e4m3_e_mask | f8e4m3_m_mask
         elif f8_biased_exp > 0:
             # normalized f8
             exp_field = (f8_biased_exp & (f8e4m3_e_mask >> f8e4m3_m_size)) << f8e4m3_m_size
             f8_bits |= exp_field
-            f8_bits |= (fractional >> byte_shift)
+            f8_bits |= fractional >> byte_shift
         else:
             # subnormal f8
             fractional = f8_m_hidden_one_mask | ((inp & f16_m_mask) << (f16_e_size - f8e4m3_e_size))
@@ -113,11 +113,14 @@ def _f16_to_f8e4m3_bits_scalar(h_bits: int) -> int:
 
             fractional = 0 if f8_exp < f8_e_subnormal_min else (fractional >> (1 - f8_biased_exp))
 
-            if (((fractional & round_half) == round_odd and sticky == 0) or
-                (fractional & round_norm) != 0 or sticky != 0):
+            if (
+                ((fractional & round_half) == round_odd and sticky == 0)
+                or (fractional & round_norm) != 0
+                or sticky != 0
+            ):
                 fractional += round_even
 
-            f8_bits |= (fractional >> byte_shift)
+            f8_bits |= fractional >> byte_shift
     else:
         # f16 zero / subnormal -> sign + zero exponent/mantissa
         # (f8_bits already contains the sign)
diff --git a/src/nncf/tensor/functions/numpy_numeric.py b/src/nncf/tensor/functions/numpy_numeric.py
@@ -37,6 +37,7 @@
     TensorDataType.int32: np.dtype(np.int32),
     TensorDataType.int64: np.dtype(np.int64),
     TensorDataType.uint8: np.dtype(np.uint8),
+    TensorDataType.uint16: np.dtype(np.uint16),
 }
 
 DTYPE_MAP_REV = {v: k for k, v in DTYPE_MAP.items()}
diff --git a/src/nncf/tensor/functions/torch_numeric.py b/src/nncf/tensor/functions/torch_numeric.py
@@ -35,6 +35,7 @@
     TensorDataType.int32: torch.int32,
     TensorDataType.int64: torch.int64,
     TensorDataType.uint8: torch.uint8,
+    TensorDataType.uint16: torch.uint16,
 }
 
 DEVICE_MAP = {TensorDeviceType.CPU: "cpu", TensorDeviceType.GPU: "cuda"}
diff --git a/tests/cross_fw/test_templates/template_test_nncf_tensor.py b/tests/cross_fw/test_templates/template_test_nncf_tensor.py
@@ -2168,6 +2168,7 @@ def test_fn_eye(self, n, m, ref):
                 in [
                     TensorDataType.int4,
                     TensorDataType.uint4,
+                    TensorDataType.uint16,
                     TensorDataType.nf4,
                     TensorDataType.f4e2m1,
                     TensorDataType.f8e8m0,
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -1441,6 +1441,38 @@ def test_int_compressed_weighs_range(mode, data):
             8.0,
         ],
     },
+    CompressWeightsMode.FP8_E4M3: {
+        "neg": [
+            -8.0,
+            -6.857143402099609,
+            -5.714285850524902,
+            -5.142857551574707,
+            -4.0,
+            -2.857142925262451,
+            -2.0,
+            -1.0,
+            0.0,
+        ],
+        "pos": [0.0, 1.0, 2.0, 2.857142925262451, 4.0, 5.142857551574707, 5.714285850524902, 6.857143402099609, 8.0],
+        "neg-pos": [
+            -8.0,
+            -6.857143402099609,
+            -5.714285850524902,
+            -5.142857551574707,
+            -4.0,
+            -2.857142925262451,
+            -2.0,
+            -1.0,
+            0.0,
+            1.0,
+            2.0,
+            2.857142925262451,
+            4.0,
+            5.142857551574707,
+            5.714285850524902,
+            6.857143402099609,
+        ],
+    },
 }
 
 
@@ -1999,7 +2031,7 @@ def test_nf4_quantization_mid_quant(weight, scale):
     scale = Tensor(scale)
     # norm_weight equals -0.8480964 (one bit away from the first NF4 quantile center)
     norm_weight = _calculate_normalized_weight(weight, scale)
-    nf4_quant = _calculate_float_quantized_weight(norm_weight, CompressWeightsMode.NF4)
+    nf4_quant = _calculate_float_quantized_weight(norm_weight, TensorDataType.nf4)
 
     norm_weight_ov_backend = Tensor(ov.Tensor(norm_weight.data, norm_weight.shape, ov.Type.f32))
     ref_nf4_quant = norm_weight_ov_backend.astype(TensorDataType.nf4).as_numpy_tensor()
@@ -2027,7 +2059,7 @@ def test_nf4_quantization_mid_quant(weight, scale):
 )
 def test_mxfp4_quantization_edge_cases(input_val, expected_val, description):
     norm_weight = Tensor(np.array([input_val], dtype=np.float32))
-    result = _calculate_float_quantized_weight(norm_weight, CompressWeightsMode.MXFP4)
+    result = _calculate_float_quantized_weight(norm_weight, TensorDataType.f4e2m1)
 
     assert result.data[0] == expected_val, (
         f"{description}: Expected {expected_val}, got {result.data[0]} for input value {input_val}"

Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@`
`37`	`37`	`TensorDataType.int32: np.dtype(np.int32),`
`38`	`38`	`TensorDataType.int64: np.dtype(np.int64),`
`39`	`39`	`TensorDataType.uint8: np.dtype(np.uint8),`
	`40`	`+ TensorDataType.uint16: np.dtype(np.uint16),`
`40`	`41`	`}`
`41`	`42`
`42`	`43`	`DTYPE_MAP_REV = {v: k for k, v in DTYPE_MAP.items()}`
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,7 @@`
`35`	`35`	`TensorDataType.int32: torch.int32,`
`36`	`36`	`TensorDataType.int64: torch.int64,`
`37`	`37`	`TensorDataType.uint8: torch.uint8,`
	`38`	`+ TensorDataType.uint16: torch.uint16,`
`38`	`39`	`}`
`39`	`40`
`40`	`41`	`DEVICE_MAP = {TensorDeviceType.CPU: "cpu", TensorDeviceType.GPU: "cuda"}`