models with transpose_a faild with error for unsupported algos

daniil-lyakhov · daniil-lyakhov · commit 57f4fca805a1 · 2025-12-16T18:29:45.000+01:00
diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -1087,6 +1087,12 @@ def apply_with_parameters(
                 )
 
             if self._lora_correction:
+                for wc_params in all_weight_params:
+                    act_port_id = self._backend_entity.get_activation_port_id(wc_params.node_with_weight, graph)
+                    if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, act_port_id):
+                        msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
+                        raise nncf.UnsupportedModelError(msg)
+
                 lora_correction_params = self._advanced_parameters.lora_correction_params
                 lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
                 description += " with correction of low-rank adapters"
diff --git a/src/nncf/quantization/algorithms/weight_compression/backend.py b/src/nncf/quantization/algorithms/weight_compression/backend.py
@@ -111,6 +111,17 @@ def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: TMo
         :return: The weight tensor.
         """
 
+    @abstractmethod
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
+        """
+        Checks whether the activation input of a MatMul operation is transposed.
+
+        :param matmul: MatMul NNCFGraph node.
+        :param act_port_id: Index of the input port corresponding to the activation tensor.
+        :return: True if the node is a matmul node and activation input is transposed,
+            False otherwise.
+        """
+
     @abstractmethod
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: TModel, graph: NNCFGraph
@@ -279,6 +290,7 @@ def get_ignored_patterns() -> GraphPattern:
     def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
         """
         Returns axis number of the activation tensor which correspond to it channel.
+
         :param node: NNCFNode instance.
         :param port_id: Port ID for input.
         :param input_shape: Shape of the input.
diff --git a/src/nncf/quantization/algorithms/weight_compression/gptq.py b/src/nncf/quantization/algorithms/weight_compression/gptq.py
@@ -124,6 +124,12 @@ def apply(
                 CompressWeightsMode.INT8_SYM,
             ]:
                 continue
+
+            act_port_id = self._backend_entity.get_activation_port_id(wc_params.node_with_weight, graph)
+            if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, act_port_id):
+                msg = "Transposed activations are not supported yet for the GPTQ algorithm"
+                raise nncf.UnsupportedModelError(msg)
+
             _, input_tensors = next(iter(inputs.items()))
             hessian = self._calculate_hessian(node, input_tensors)
             scale, zero_point = self._quantize_weights(model, graph, wc_params, hessian, input_tensors)
diff --git a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
@@ -187,6 +187,12 @@ def get_weight(
         weight_tensor = get_tensor_value(model, weight_name)
         return Tensor(weight_tensor)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
+        if matmul.metatype != metatypes.ONNXGemmMetatype:
+            return False
+        trans_attr = "transB" if act_port_id else "transA"
+        return matmul.layer_attributes.node_attrs[trans_attr]
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: onnx.ModelProto, graph: NNCFGraph
     ) -> TensorDataType:
diff --git a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -135,14 +135,16 @@ def get_weight_names_and_port_ids(node: NNCFNode, graph: NNCFGraph) -> list[tupl
         return result
 
     def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph) -> Tensor:
-        if not node_with_weight.layer_attributes.constant_attributes[weight_port_id]["transpose"]:
-            msg = "Only transposed weights are supported"
-            raise nncf.UnsupportedModelError(msg)
         weight_name = node_with_weight.layer_attributes.constant_attributes[weight_port_id]["name"]
         weight_node = self.name_to_node_mapping[weight_name]
         weight_tensor = get_const_value_as_numpy_tensor(weight_node)
         return Tensor(weight_tensor)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
+        if matmul.metatype != om.OVMatMulMetatype:
+            return False
+        return matmul.layer_attributes.input_attributes["transpose"]
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph
     ) -> TensorDataType:
@@ -330,15 +332,6 @@ def transform_model(
         compression_format: CompressionFormat = CompressionFormat.DQ,
         advanced_parameters: Optional[AdvancedCompressionParameters] = None,
     ) -> ov.Model:
-        for wc_params in weight_compression_parameters:
-            if (
-                lora_correction_algo is not None
-                and lora_correction_algo.is_applicable(wc_params)
-                and wc_params.node_with_weight.layer_attributes.input_attributes["transpose"]
-            ):
-                msg = "Transposed input for the LoRa correction is not supported"
-                raise nncf.UnsupportedModelError(msg)
-
         for wc_params in weight_compression_parameters:
             const_attributes = wc_params.node_with_weight.layer_attributes.constant_attributes[wc_params.weight_port_id]
             const_node_name = const_attributes["name"]
diff --git a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
@@ -139,6 +139,10 @@ def apply(
                 continue
             _, weight_port_id = weight_data[0]
 
+            act_port_id = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph)
+            if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, act_port_id):
+                msg = "Transposed activations are not supported yet for the Scale Estimation algorithm"
+                raise nncf.UnsupportedModelError(msg)
             weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)
 
             scale, zero_point = self.calculate_quantization_params(
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
@@ -177,6 +177,9 @@ def get_weight(
             raise nncf.InternalError(msg)
         return Tensor(weight)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
+        return False
+
     def get_weight_dtype(
         self,
         node_with_weight: NNCFNode,
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
@@ -128,6 +128,9 @@ def get_weight(
 
         return Tensor(weight)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
+        return False
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: torch.fx.GraphModule, graph: NNCFGraph
     ) -> TensorDataType:
diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py
@@ -31,6 +31,7 @@
 from nncf.quantization import compress_weights
 from nncf.quantization.advanced_parameters import AdvancedAWQParameters as AWQParams
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters as CompressionParams
+from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams
 from nncf.quantization.algorithms.weight_compression.activation_stats import WCTensorStatistic
 from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
 from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression
@@ -779,3 +780,42 @@ def test_process_stats(self, case: ProcessStatsTestCase):
     @abstractmethod
     def get_transposable_awq_model(transpose_a: bool, transpose_b: bool, input_shape=None) -> TModel:
         "Returns a backend model for test_compression_with_transpose."
+
+    @pytest.mark.parametrize(
+        "kwargs",
+        [
+            dict(scale_estimation=True),
+            dict(lora_correction=True),
+            dict(
+                gptq=True,
+                advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
+            ),
+        ],
+    )
+    def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
+        if not transpose_a_supported:
+            pytest.skip("transpose_a is not supported for the current backend")
+        if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
+            pytest.skip("Scale estimation is not supported")
+        if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
+            pytest.skip("GPTQ is not supported")
+        if kwargs.get("lora_correction", False) and "lora_correction" in self.get_not_supported_algorithms():
+            pytest.skip("lora_correction is not supported")
+
+        INPUT_SHAPE = (2, 4)
+        model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
+        input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
+        input = self.to_tensor(input)
+        dataset = Dataset([input] * 2, self.get_transform_func())
+
+        with pytest.raises(nncf.UnsupportedModelError):
+            compress_weights(
+                model,
+                mode=CompressWeightsMode.INT4_SYM,
+                ratio=1.0,
+                group_size=1,
+                subset_size=2,
+                dataset=dataset,
+                all_layers=True,
+                **kwargs,
+            )
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -1943,37 +1943,6 @@ def test_compression_with_different_algo_combinations(input_shape, kwargs):
     )
 
 
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        dict(scale_estimation=True),
-        dict(lora_correction=True),
-        dict(
-            gptq=True,
-            scale_estimation=True,
-            advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
-        ),
-    ],
-)
-def test_compression_with_transposed_activations(kwargs):
-    dataset_size = 4
-    model = LMLinearModel(transpose_a=True, transpose_b=False).ov_model
-    input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size
-    dataset = Dataset(input_data)
-
-    with pytest.raises(nncf.UnsupportedModelError):
-        compress_weights(
-            model,
-            mode=CompressWeightsMode.INT4_SYM,
-            ratio=1.0,
-            group_size=8,
-            subset_size=2,
-            dataset=dataset,
-            all_layers=True,
-            **kwargs,
-        )
-
-
 @pytest.mark.parametrize("disabled", [False, True])
 def test_disabled_optimized_compression(disabled):
     hidden_dim = (MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION // LMLinearModel.OUTPUT_DIM) + 1