diff --git a/src/nncf/quantization/algorithms/weight_compression/activation_stats.py b/src/nncf/quantization/algorithms/weight_compression/activation_stats.py
index 514aaec43a7..24778e1e952 100644
--- a/src/nncf/quantization/algorithms/weight_compression/activation_stats.py
+++ b/src/nncf/quantization/algorithms/weight_compression/activation_stats.py
@@ -17,12 +17,13 @@
 from nncf.tensor import functions as fns
 
 
-def process_stats(stats: WCTensorStatistic, subset_size: int) -> tuple[Tensor, Tensor]:
+def process_stats(stats: WCTensorStatistic, subset_size: int, act_ch_axis: int = -1) -> tuple[Tensor, Tensor]:
     """
     A function for processing activations. Shared between AWQ, Scale Estimation and LoRA Correction algorithms.
 
     :param stats: An object containing statistics for the layer.
     :param subset_size: The number of samples for AWQ.
+    :param act_ch_axis: The activation channel axis.
     :return: tuple of the following tensors:
         s - maximum channel magnitude across samples [HiddenDim]
         X - average channel magnitude across tokens in the sequence [HiddenDim, min(SampleSize, ~subset_size)]
@@ -41,7 +42,9 @@ def process_stats(stats: WCTensorStatistic, subset_size: int) -> tuple[Tensor, T
 
     # Prevent high memory and time consumption by sampling
     if X_full.shape[sample_axis] > subset_size:
-        lens = [reduce(mul, shape[:-1], 1) for shape in stats.shape_values]
+        lens = [
+            reduce(mul, shape[:act_ch_axis] + shape[act_ch_axis % len(shape) + 1 :], 1) for shape in stats.shape_values
+        ]
         step = X_full.shape[sample_axis] // subset_size
         idxs = [i[0] for i in sorted(enumerate(lens), key=lambda x: -x[1])][::step]
         X = X_full[..., idxs]
diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
index 7a58055a4d8..aebde028217 100644
--- a/src/nncf/quantization/algorithms/weight_compression/algorithm.py
+++ b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -959,9 +959,9 @@ def get_weight_compression_parameters(
                         # MoE operations are usually matmuls, so the check for matmul metatype is done
                         # This is to avoid raising the error for non-MoE cases with 3D weights.
                         parsed_ov_version = f"{ov_version[0]}.{ov_version[1]}.{ov_version[2]}-{ov_version[3]}"
-                        msg = f"""NNCF compression algorithms do not support 3D weights with current version of 
-                                OpenVINO {parsed_ov_version} due to a known issue in statistics collection 
-                                Ticket - 176465. Please update to the latest OpenVINO nightly version. 
+                        msg = f"""NNCF compression algorithms do not support 3D weights with current version of
+                                OpenVINO {parsed_ov_version} due to a known issue in statistics collection
+                                Ticket - 176465. Please update to the latest OpenVINO nightly version.
                                 Node with weight: {node.node_name}."""
                         raise nncf.UnsupportedModelError(msg)
 
@@ -1087,6 +1087,11 @@ def apply_with_parameters(
                 )
 
             if self._lora_correction:
+                for wc_params in all_weight_params:
+                    if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
+                        msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
+                        raise nncf.UnsupportedModelError(msg)
+
                 lora_correction_params = self._advanced_parameters.lora_correction_params
                 lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
                 description += " with correction of low-rank adapters"
@@ -1128,19 +1133,21 @@ def apply_with_parameters(
         )
         return transformed_model
 
-    def _get_activation_node_and_port(self, node: NNCFNode, nncf_graph: NNCFGraph) -> tuple[NNCFNode, int]:
+    def _get_activation_node_port_and_channel(self, node: NNCFNode, nncf_graph: NNCFGraph) -> tuple[NNCFNode, int, int]:
         """
-        This method returns the activation layer and corresponding port id for the node.
+        This method returns the activation layer, corresponding port id and channel axis for the given node.
 
         :param node: NNCFGraph node for which the activation is sought.
         :param nncf_graph: NNCFGraph instance with the node.
-        :return: Tuple with the activation node and port id.
+        :return: Tuple with the activation node, port id and channel axis.
         """
         activation_port = self._backend_entity.get_activation_port_id(node, nncf_graph)
         activation_edge = nncf_graph.get_input_edge_by_port_id(node, activation_port)
         activation_node = activation_edge.from_node
-        port_id = activation_edge.output_port_id
-        return activation_node, port_id
+        activation_channel_axis = self._backend_entity.get_activation_channel_axis(
+            node, activation_edge.input_port_id, activation_edge.tensor_shape
+        )
+        return activation_node, activation_edge.output_port_id, activation_channel_axis
 
     def get_matmul_input_to_output_nodes_map(
         self, matmul_nodes: list[NNCFNode], graph: NNCFGraph
@@ -1161,8 +1168,8 @@ def get_matmul_input_to_output_nodes_map(
         """
         matmul_input_to_output_nodes_map = defaultdict(list)
         for node in matmul_nodes:
-            act_node, output_port_id = self._get_activation_node_and_port(node, graph)
-            matmul_input_to_output_nodes_map[(act_node, output_port_id)].append(node)
+            act_node, output_port_id, act_channel_axis = self._get_activation_node_port_and_channel(node, graph)
+            matmul_input_to_output_nodes_map[(act_node, output_port_id, act_channel_axis)].append(node)
         return matmul_input_to_output_nodes_map
 
     def get_compression_nodes_info(
@@ -1230,7 +1237,11 @@ def get_statistic_points(
 
         # Statistics for data aware algorithms
         if self._data_aware_compression:
-            for (node, output_port_id), node_with_weights in matmul_input_to_output_nodes_map.items():
+            for (
+                node,
+                output_port_id,
+                input_channel_axis,
+            ), node_with_weights in matmul_input_to_output_nodes_map.items():
                 statistic_point = self._backend_entity.target_point(
                     TargetType.POST_LAYER_OPERATION, node.node_name, port_id=output_port_id
                 )
@@ -1245,13 +1256,16 @@ def get_statistic_points(
                     ]
                     all_weight_dims.extend(weight_dims)
 
-                # by default, reduce activations across all but the last dimension. The last dimension is
-                # assumed to be the hidden size dimension.
+                # Reduce activations across all but the hidden dimension.
                 n_dims = len(graph.get_output_edges_by_port_id(node, output_port_id)[0].tensor_shape)
-                reduction_axes = tuple(range(n_dims - 1))
+                # negative axis (e.g. -1 for the last axis) is converted into corresponding positive value
+                input_channel_axis = input_channel_axis % n_dims
+                reduction_axes = tuple(i for i in range(n_dims) if i != input_channel_axis)
 
-                # For 3D weights, hidden dimension is the second dimension. Reduce by all other dimensions
-                reduction_axes = (1,) if any(weight_dim == 3 for weight_dim in all_weight_dims) else reduction_axes
+                # For 3D weights, keep the batch dimention
+                if any(weight_dim == 3 for weight_dim in all_weight_dims):
+                    assert len(reduction_axes) == 2
+                    reduction_axes = reduction_axes[1:]
 
                 stat_collector = self._backend_entity.mean_statistic_collector(
                     reduction_axes=reduction_axes, subset_size=self._subset_size
@@ -1291,7 +1305,7 @@ def _get_statistics_for_weights_compression(
         # Where mean_value is a 1D tensor representing an activation reduced over batch and sequence length dimensions,
         # shape is an original shape of an activation before reduction, n is the size of the dataset (or subset_size).
         statistics = {}
-        for (act_node, output_port_id), matmul_nodes in matmul_input_to_output_nodes_map.items():
+        for (act_node, output_port_id, _), matmul_nodes in matmul_input_to_output_nodes_map.items():
             tensor_collectors = list(
                 statistic_points.get_algo_statistics_for_node(
                     act_node.node_name,
diff --git a/src/nncf/quantization/algorithms/weight_compression/awq.py b/src/nncf/quantization/algorithms/weight_compression/awq.py
index fab970fc0dc..411d79ae795 100644
--- a/src/nncf/quantization/algorithms/weight_compression/awq.py
+++ b/src/nncf/quantization/algorithms/weight_compression/awq.py
@@ -170,6 +170,8 @@ def apply(
             weight_dtype = weight.dtype
             weight = weight.astype(TensorDataType.float32)
 
+            act_ch_axis, act_shape = self._get_activation_channel_axis_and_shape(graph, wp)
+
             if is_data_free:
                 scale = self._data_free_step(weight, 1 - wp.reduction_axes[0])
             else:
@@ -181,10 +183,10 @@ def apply(
                     prev_weight = self._backend_entity.get_weight(merge_node, prev_weight_port_id, model, graph)
 
                     prev_statistics = statistics[merge_node.node_name]
-                scale = self._data_aware_step(wp, weight, statistics[k], prev_weight, prev_statistics)
+                scale = self._data_aware_step(wp, weight, statistics[k], act_ch_axis, prev_weight, prev_statistics)
 
             w_scale = fns.unsqueeze(scale, 1 - wp.reduction_axes[0])
-            a_scale = fns.unsqueeze(1.0 / scale, wp.reduction_axes[0])
+            a_scale = 1.0 / scale
 
             scaled_weight = (weight * w_scale).astype(weight_dtype)
             self._backend_entity.set_weight(wp.node_with_weight, weight_port_id, model, graph, scaled_weight)
@@ -192,13 +194,17 @@ def apply(
             if is_mergeable:  # for MatMul->Multiply->MatMul pattern the scale is merged to the first MatMul
                 for _, port_id in self._backend_entity.get_weight_names_and_port_ids(merge_node, graph):
                     merge_weight = self._backend_entity.get_weight(merge_node, port_id, model, graph)
+                    a_scale = fns.unsqueeze(a_scale, wp.reduction_axes[0])
                     merge_weight = (merge_weight * a_scale).astype(weight_dtype)
                     self._backend_entity.set_weight(merge_node, port_id, model, graph, merge_weight)
-                a_scale = fns.transpose(a_scale)
             else:  # for Act->Multiply->MatMul and Act->MatMul patterns scale inserted after Act as extra node
-                a_scale = fns.transpose(a_scale).astype(weight_dtype)
+                # Calculate the activation scale shape
+                a_scale_shape = [scale.shape[0] if axis == act_ch_axis else 1 for axis in range(len(act_shape))]
+                a_scale = fns.reshape(a_scale, tuple(a_scale_shape))
+
                 next_nodes = graph.get_next_nodes(merge_node)
                 source_node_output_port = graph.get_output_edges(merge_node)[0].output_port_id
+
                 scale_insertion_command = self._backend_entity.scale_insertion_command(
                     merge_node, next_nodes, source_node_output_port, a_scale.data
                 )
@@ -210,10 +216,10 @@ def apply(
 
         return transformed_model
 
-    def _data_aware_step(self, wp, weight, statistics, prev_weight=None, prev_statistics=None):
+    def _data_aware_step(self, wp, weight, statistics, act_ch_axis, prev_weight=None, prev_statistics=None):
         alpha_step = (self._alpha_max - self._alpha_min) / self._steps
         config = wp.compression_config
-        s, X = process_stats(statistics, self._subset_size)
+        s, X = process_stats(statistics, self._subset_size, act_ch_axis)
         s = s.astype(TensorDataType.float32)
         X = X.astype(TensorDataType.float32)
 
@@ -222,7 +228,7 @@ def _data_aware_step(self, wp, weight, statistics, prev_weight=None, prev_statis
 
         prev_s, prev_w = None, None
         if prev_statistics is not None and prev_weight is not None:
-            prev_s, _ = process_stats(prev_statistics, self._subset_size)
+            prev_s, _ = process_stats(prev_statistics, self._subset_size, act_ch_axis)
             prev_s = prev_s.astype(TensorDataType.float32).max().item()
             prev_w = fns.mean(fns.abs(prev_weight), axis=reduction_axis)
 
@@ -311,6 +317,16 @@ def _data_aware_step(self, wp, weight, statistics, prev_weight=None, prev_statis
 
         return scale
 
+    def _get_activation_channel_axis_and_shape(
+        self, graph: NNCFGraph, wp: WeightCompressionParameters
+    ) -> tuple[int, tuple[int, ...]]:
+        activation_port_id = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph)
+        act_shape = graph.get_input_edge_by_port_id(wp.node_with_weight, activation_port_id).tensor_shape
+        act_ch_axis = self._backend_entity.get_activation_channel_axis(
+            wp.node_with_weight, activation_port_id, act_shape
+        )
+        return act_ch_axis % len(act_shape), act_shape
+
     @staticmethod
     def _clamp_scale(magnitudes, threshold, scale, clamped_scale):
         return fns.where(magnitudes < threshold, scale, clamped_scale)
diff --git a/src/nncf/quantization/algorithms/weight_compression/backend.py b/src/nncf/quantization/algorithms/weight_compression/backend.py
index 24baed2ae7d..d1ddf8f99dc 100644
--- a/src/nncf/quantization/algorithms/weight_compression/backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/backend.py
@@ -110,6 +110,17 @@ def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: TMo
         :return: The weight tensor.
         """
 
+    @abstractmethod
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, int, graph: NNCFGraph) -> bool:
+        """
+        Checks whether the activation input of a MatMul operation is transposed.
+
+        :param matmul: MatMul NNCFGraph node.
+        :param graph: The model graph associated with the model.
+        :return: True if the node is a matmul node and activation input is transposed,
+            False otherwise.
+        """
+
     @abstractmethod
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: TModel, graph: NNCFGraph
@@ -273,6 +284,18 @@ def get_ignored_patterns() -> GraphPattern:
         :return: backend-specific ignored patterns.
         """
 
+    @staticmethod
+    @abstractmethod
+    def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
+        """
+        Returns axis number of the activation tensor which correspond to it channel.
+
+        :param node: NNCFNode instance.
+        :param port_id: Port ID for input.
+        :param input_shape: Shape of the input.
+        :return: Channel axis number.
+        """
+
 
 class AWQAlgoBackend(WeightCompressionAlgoBackend):
     @staticmethod
diff --git a/src/nncf/quantization/algorithms/weight_compression/gptq.py b/src/nncf/quantization/algorithms/weight_compression/gptq.py
index 343716615cd..aeb32adede1 100644
--- a/src/nncf/quantization/algorithms/weight_compression/gptq.py
+++ b/src/nncf/quantization/algorithms/weight_compression/gptq.py
@@ -124,6 +124,11 @@ def apply(
                 CompressWeightsMode.INT8_SYM,
             ]:
                 continue
+
+            if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
+                msg = "Transposed activations are not supported yet for the GPTQ algorithm"
+                raise nncf.UnsupportedModelError(msg)
+
             _, input_tensors = next(iter(inputs.items()))
             hessian = self._calculate_hessian(node, input_tensors)
             scale, zero_point = self._quantize_weights(model, graph, wc_params, hessian, input_tensors)
diff --git a/src/nncf/quantization/algorithms/weight_compression/mixed_precision.py b/src/nncf/quantization/algorithms/weight_compression/mixed_precision.py
index bc1551e00aa..93439f8669c 100644
--- a/src/nncf/quantization/algorithms/weight_compression/mixed_precision.py
+++ b/src/nncf/quantization/algorithms/weight_compression/mixed_precision.py
@@ -279,7 +279,7 @@ def get_statistic_points(
         self._set_backend_entity(model)
 
         statistic_container = StatisticPointsContainer()
-        for act_node, output_port_id in nodes_and_port_ids:
+        for act_node, output_port_id, _ in nodes_and_port_ids:
             n_dims = len(graph.get_output_edges_by_port_id(act_node, output_port_id)[0].tensor_shape)
             if n_dims < 2:
                 msg = (
diff --git a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
index ed483486e5a..00a4394a14a 100644
--- a/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/onnx_backend.py
@@ -38,6 +38,7 @@
 from nncf.onnx.graph.model_transformer import remove_initializer
 from nncf.onnx.graph.model_transformer import remove_node
 from nncf.onnx.graph.model_transformer import set_initializer
+from nncf.onnx.graph.node_utils import get_act_quantization_axis
 from nncf.onnx.graph.node_utils import get_weight_quantization_axis
 from nncf.onnx.graph.onnx_helper import ONNX_DTYPE_TO_NNCF_DTYPE
 from nncf.onnx.graph.onnx_helper import get_name_to_node_map
@@ -186,6 +187,13 @@ def get_weight(
         weight_tensor = get_tensor_value(model, weight_name)
         return Tensor(weight_tensor)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, graph: NNCFGraph) -> bool:
+        if matmul.metatype != metatypes.ONNXGemmMetatype:
+            return False
+        act_port_id = self.get_activation_port_id(matmul, graph)
+        trans_attr = "transB" if act_port_id else "transA"
+        return matmul.layer_attributes.node_attrs[trans_attr]
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: onnx.ModelProto, graph: NNCFGraph
     ) -> TensorDataType:
@@ -301,6 +309,10 @@ def filter_func(point: StatisticPoint) -> bool:
 
         return filter_func
 
+    @staticmethod
+    def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
+        return get_act_quantization_axis(node, port_id)
+
     def insert_adapters(
         self, wc_params: WeightCompressionParameters, lora_A: Tensor, lora_B: Tensor, int8_lora: bool
     ) -> None:
@@ -503,9 +515,13 @@ def get_ignored_patterns() -> GraphPattern:
 class ONNXAWQAlgoAlgoBackend(AWQAlgoBackend, ONNXWeightCompressionAlgoBackend):
     @staticmethod
     def get_awq_patterns() -> dict[str, Callable]:
-        return get_awq_patterns(
-            onnx_metatypes.ONNXMatMulMetatype, onnx_metatypes.ONNXMulLayerMetatype, ATOMIC_ACTIVATIONS_OPERATIONS
-        )
+        patterns = {}
+        for mm_metatype in (onnx_metatypes.ONNXMatMulMetatype, onnx_metatypes.ONNXGemmMetatype):
+            p = get_awq_patterns(mm_metatype, onnx_metatypes.ONNXMulLayerMetatype, ATOMIC_ACTIVATIONS_OPERATIONS)
+            p = {f"{mm_metatype.__name__}_{k}": v for k, v in p.items()}
+            patterns.update(p)
+
+        return patterns
 
     @staticmethod
     def scale_insertion_command(
diff --git a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
index 1b3eb386d36..56c5282d2e0 100644
--- a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -13,7 +13,6 @@
 import openvino as ov
 from openvino import opset13 as opset
 
-import nncf
 from nncf.common.graph import NNCFGraph
 from nncf.common.graph import NNCFNode
 from nncf.common.graph.operator_metatypes import OperatorMetatype
@@ -35,6 +34,7 @@
 from nncf.openvino.graph.node_utils import convert_op
 from nncf.openvino.graph.node_utils import create_ov_codebook_subgraph
 from nncf.openvino.graph.node_utils import create_ov_const_from_tensor
+from nncf.openvino.graph.node_utils import get_activation_channel_axis
 from nncf.openvino.graph.node_utils import get_const_value_as_numpy_tensor
 from nncf.openvino.graph.node_utils import get_const_value_as_ov_tensor
 from nncf.openvino.graph.node_utils import get_weight_channel_axes
@@ -119,9 +119,6 @@ def mean_statistic_collector(
 
     @staticmethod
     def get_activation_port_id(node: NNCFNode, nncf_graph: NNCFGraph) -> int:
-        if node.layer_attributes.input_attributes["transpose"]:
-            msg = "Transposed input is not supported"
-            raise nncf.UnsupportedModelError(msg)
         constant_ports = node.layer_attributes.get_const_port_ids()
         activation_ports = [
             e.input_port_id for e in nncf_graph.get_input_edges(node) if e.input_port_id not in constant_ports
@@ -143,6 +140,11 @@ def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.
         weight_tensor = get_const_value_as_numpy_tensor(weight_node)
         return Tensor(weight_tensor)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, graph: NNCFGraph) -> bool:
+        if matmul.metatype != om.OVMatMulMetatype:
+            return False
+        return matmul.layer_attributes.input_attributes["transpose"]
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph
     ) -> TensorDataType:
@@ -378,6 +380,10 @@ def get_ignored_patterns() -> GraphPattern:
         pattern.add_pattern_alternative(create_sam_pe())
         return pattern
 
+    @staticmethod
+    def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
+        return get_activation_channel_axis(node, port_id, input_shape)
+
 
 class OVTensorWeightCompressionAlgoBackend(OVWeightCompressionAlgoBackend):
     """
diff --git a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
index 92ef97364ef..d953a284c06 100644
--- a/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
+++ b/src/nncf/quantization/algorithms/weight_compression/scale_estimation.py
@@ -139,6 +139,10 @@ def apply(
                 continue
             _, weight_port_id = weight_data[0]
 
+            if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, graph):
+                msg = "Transposed activations are not supported yet for the Scale Estimation algorithm"
+                raise nncf.UnsupportedModelError(msg)
+
             weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)
 
             scale, zero_point = self.calculate_quantization_params(
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
index 143ec0d7173..eb142c032b4 100644
--- a/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/torch_backend.py
@@ -60,6 +60,7 @@
 from nncf.torch.model_graph_manager import get_module_by_name
 from nncf.torch.model_graph_manager import get_weight_compression_reduction_axes
 from nncf.torch.model_graph_manager import split_const_name
+from nncf.torch.node_utils import get_activation_channel_axis as get_activation_channel_axis_util
 from nncf.torch.quantization.ignored_patterns import create_rope
 from nncf.torch.quantization.ignored_patterns import create_sam_pe
 from nncf.torch.quantization.layers import QUANTIZATION_MODULES
@@ -175,6 +176,9 @@ def get_weight(
             raise nncf.InternalError(msg)
         return Tensor(weight)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, graph: NNCFGraph) -> bool:
+        return False
+
     def get_weight_dtype(
         self,
         node_with_weight: NNCFNode,
@@ -482,6 +486,10 @@ def get_ignored_patterns() -> GraphPattern:
         pattern.add_pattern_alternative(create_sam_pe())
         return pattern
 
+    @staticmethod
+    def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
+        return get_activation_channel_axis_util(node, port_id)
+
 
 class PTAWQAlgoAlgoBackend(AWQAlgoBackend, PTWeightCompressionAlgoBackend):
     @staticmethod
diff --git a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
index 8b219118bf7..2182c85b6bf 100644
--- a/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
+++ b/src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py
@@ -56,6 +56,7 @@
 from nncf.torch.model_graph_manager import get_const_node
 from nncf.torch.model_graph_manager import get_weight_compression_reduction_axes
 from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
+from nncf.torch.node_utils import get_activation_channel_axis as get_activation_channel_axis_fn
 from nncf.torch.quantization.ignored_patterns import create_rope
 from nncf.torch.quantization.ignored_patterns import create_sam_pe
 from nncf.torch.quantization.layers import INT4AsymmetricWeightsDecompressor
@@ -127,6 +128,9 @@ def get_weight(
 
         return Tensor(weight)
 
+    def matmul_has_transposed_activations(self, matmul: NNCFNode, graph: NNCFGraph) -> bool:
+        return False
+
     def get_weight_dtype(
         self, node_with_weight: NNCFNode, weight_port_id: int, model: torch.fx.GraphModule, graph: NNCFGraph
     ) -> TensorDataType:
@@ -262,6 +266,10 @@ def get_ignored_patterns() -> GraphPattern:
         pattern.add_pattern_alternative(create_sam_pe())
         return pattern
 
+    @staticmethod
+    def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
+        return get_activation_channel_axis_fn(node, port_id)
+
 
 class FXMixedPrecisionAlgoBackend(MixedPrecisionAlgoBackend, FXWeightCompressionAlgoBackend):
     pass
diff --git a/src/nncf/torch/node_utils.py b/src/nncf/torch/node_utils.py
new file mode 100644
index 00000000000..7f325834557
--- /dev/null
+++ b/src/nncf/torch/node_utils.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2026 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import nncf
+import nncf.torch.graph.operator_metatypes as op
+from nncf.common.graph import NNCFNode
+from nncf.torch.graph.operator_metatypes import PTAddmmMetatype
+from nncf.torch.graph.operator_metatypes import PTMatMulMetatype
+
+
+def get_activation_channel_axis(node: NNCFNode, port_id: int) -> int:
+    """
+    Returns axis number of the activation tensor which correspond to its channel.
+
+    :param node: NNCFNode instance.
+    :param port_id: Port ID for input.
+    :return: Channel axis number.
+    """
+    if node.metatype not in op.CONVOLUTION_METATYPES + op.MATMUL_METATYPES:
+        msg = f"Activation channel axis retrieval from node with metatype {node.metatype} is not supported"
+        raise nncf.InternalError(msg)
+
+    if node.metatype not in [PTMatMulMetatype, PTAddmmMetatype]:
+        return node.metatype.output_channel_axis
+
+    if port_id == 0:
+        # X(port:0) * W(port:1): [..., C_IN] * [... , C_IN, C_OUT]
+        return -1
+    if port_id == 1:
+        # W(port:0) * X(port:1): [... , C_OUT, C_IN] * [... , C_IN, ...]
+        return -2
+
+    msg = f"Port id for a {node.metatype} operation is expected to be in [0, 1], {port_id} recieved"
+    raise nncf.InternalError(msg)
diff --git a/tests/cross_fw/test_templates/template_test_weights_compression.py b/tests/cross_fw/test_templates/template_test_weights_compression.py
index 3e6aa4802eb..d5ced55ff8d 100644
--- a/tests/cross_fw/test_templates/template_test_weights_compression.py
+++ b/tests/cross_fw/test_templates/template_test_weights_compression.py
@@ -11,6 +11,7 @@
 import math
 from abc import ABC
 from abc import abstractmethod
+from dataclasses import dataclass
 from functools import reduce
 from operator import mul
 from typing import Any, Callable, Optional, TypeVar
@@ -30,6 +31,7 @@
 from nncf.quantization import compress_weights
 from nncf.quantization.advanced_parameters import AdvancedAWQParameters as AWQParams
 from nncf.quantization.advanced_parameters import AdvancedCompressionParameters as CompressionParams
+from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams
 from nncf.quantization.algorithms.weight_compression.activation_stats import WCTensorStatistic
 from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
 from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression
@@ -162,7 +164,7 @@ def test_data_based_criterion(self, mode, ref_score, ref_act_score, mocker):
 
     @staticmethod
     @abstractmethod
-    def get_sequential_matmul_model() -> TModel:
+    def get_sequential_matmul_model(transpose_a: bool) -> TModel:
         """Returns a backend model for test_mixed_precision."""
 
     @staticmethod
@@ -172,7 +174,7 @@ def to_tensor(x: TTensor) -> TTensor:
 
     @staticmethod
     @abstractmethod
-    def check_weights(model: TModel, ref_ids: list[int]) -> None:
+    def check_weights(model: TModel, ref_ids: list[int], transpose_a=False) -> None:
         """Checks that only weights with specified ids are compressed in int4 format."""
 
     @staticmethod
@@ -210,10 +212,14 @@ def wrap_model(model, data) -> CompressionParams:
             (SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE, False, 0.8, [0, 1, 2]),
         ),
     )
-    def test_mixed_precision(self, mode, all_layers, ratio, ref_ids, mocker):
-        model = self.get_sequential_matmul_model()
-        first = self.to_tensor(np.ones([1, 4, 4], dtype=np.float32))
-        second = self.to_tensor(np.arange(16, dtype=np.float32)).reshape(1, 4, 4)
+    @pytest.mark.parametrize("transpose_a", (False, True))
+    def test_mixed_precision(self, mode, all_layers, ratio, ref_ids, transpose_a, transpose_a_supported, mocker):
+        if transpose_a and not transpose_a_supported:
+            pytest.skip("transpose_a is not supported for the current backend")
+        model = self.get_sequential_matmul_model(transpose_a=transpose_a)
+        input_shape = (4, 4) if transpose_a else (1, 4, 4)
+        first = self.to_tensor(np.ones(input_shape, dtype=np.float32))
+        second = self.to_tensor(np.arange(16, dtype=np.float32)).reshape(input_shape)
         dataset = Dataset([first, second], self.get_transform_func())
         compressed_model = compress_weights(
             model,
@@ -224,7 +230,7 @@ def test_mixed_precision(self, mode, all_layers, ratio, ref_ids, mocker):
             sensitivity_metric=mode,
             dataset=dataset,
         )
-        self.check_weights(compressed_model, ref_ids)
+        self.check_weights(compressed_model, ref_ids, transpose_a)
 
     # Scale Estimation Tests
 
@@ -382,7 +388,7 @@ def test_call_max_var_criterion_with_dataset_by_default_awq_act_matmul(self, int
 
     @staticmethod
     @abstractmethod
-    def get_awq_model() -> TModel:
+    def get_awq_model(non_mergable_pattern: bool) -> TModel:
         "Returns a backend model for test_awq_with_ignored_scope."
 
     @staticmethod
@@ -406,7 +412,7 @@ def get_ignored_scope_name() -> str:
         "Returns ignored scope name for test_awq_with_ignored_scope."
 
     def test_awq_with_ignored_scope(self, mocker):
-        model = self.get_awq_model()
+        model = self.get_awq_model(non_mergable_pattern=False)
         sz = 8
         n_samples = 10
 
@@ -473,29 +479,56 @@ def test_sam_pe_weight_compression(self):
 
     @staticmethod
     @abstractmethod
-    def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
+    @pytest.fixture
+    def test_awq_scale_ref() -> dict[str, Tensor]:
         "Returns reference for test_awq_scale_reference."
 
-    def test_awq_scale_reference(self, monkeypatch, mocker):
+    @abstractmethod
+    @pytest.fixture
+    def transpose_a_supported(self) -> bool:
+        """True if backend supports tranpose for MM activations, False otherwise"""
+
+    # Transpose inputs does not affect mergable pattern code, skippting (True, False)
+    @pytest.mark.parametrize("transpose_a,non_mergable_pattern", [(True, True), (False, True), (False, False)])
+    def test_awq_scale_reference(
+        self,
+        non_mergable_pattern,
+        transpose_a,
+        test_awq_scale_ref,
+        transpose_a_supported,
+        monkeypatch,
+        mocker,
+    ):
         monkeypatch.setattr("nncf.quantization.algorithms.weight_compression.algorithm.AWQ", SpyAWQ)
-        model = self.get_awq_model()
+        if transpose_a:
+            if not transpose_a_supported:
+                msg = "Transpose a is not supported for the current backend"
+                pytest.skip(msg)
 
-        input = 0.01 * np.arange(0, 4 * 8, dtype=np.float32).reshape(1, 4, 8) + 0.02
+            INPUT_SHAPE = (2, 4)
+            model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
+        else:
+            INPUT_SHAPE = (1, 4, 8)
+            model = self.get_awq_model(non_mergable_pattern)
+        input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
         input = self.to_tensor(input)
-        dataset = Dataset([input], self.get_transform_func())
+        dataset = Dataset([input] * 2, self.get_transform_func())
 
         with SpyWeightCompressionStatisticsContext(mocker):
             _ = compress_weights(
                 model,
                 mode=CompressWeightsMode.INT4_SYM,
                 ratio=1.0,
+                all_layers=transpose_a,
                 group_size=-1,
                 dataset=dataset,
                 awq=True,
             )
         assert spy_instance is not None
         for node_name, scales in spy_instance._scale_per_target_node.items():
-            assert fns.allclose(scales, self.get_reference_for_test_awq_scale_reference()[node_name])
+            ref = test_awq_scale_ref[node_name]
+            assert fns.allclose(scales, ref)
+            assert scales.shape == ref.shape
 
     @pytest.mark.parametrize(
         ["group_size", "fallback_mode", "min_adjusted_group_size", "expected_outcome"],
@@ -662,45 +695,127 @@ def get_transform_func() -> Optional[Callable[..., Any]]:
     def get_reduction_axes() -> int:
         return 1
 
+    @dataclass
+    class ProcessStatsTestCase:
+        reduced_shape: tuple[int, ...]
+        activation_shapes: list[tuple[int, ...]]
+        subset_size: int
+        ref_s: np.ndarray
+        ref_X: np.ndarray
+        act_ch_axis: Optional[int] = None
+
     @pytest.mark.parametrize(
-        "mean_values_shape,num_samples,subset_size,expected_s_shape,expected_X_shape,expected_indices",
+        "case",
         [
             # 2D Activations
-            ((8,), 10, 5, (8,), (8, 5), [0, 2, 4, 6, 8]),
-            ((8,), 5, 10, (8,), (8, 5), [0, 1, 2, 3, 4]),
-            ((8,), 12, 5, (8,), (8, 6), [0, 2, 4, 6, 8, 10]),
+            ProcessStatsTestCase(
+                reduced_shape=(2,),
+                activation_shapes=[(1, 2), (3, 2), (5, 2), (10, 2)],
+                subset_size=2,
+                ref_s=np.array([6, 7]),
+                ref_X=np.array([6, 2, 7, 3]).reshape(2, 2),
+            ),
+            ProcessStatsTestCase(
+                reduced_shape=(2,),
+                activation_shapes=[(2, 1), (2, 3), (2, 5), (2, 10)],
+                subset_size=2,
+                act_ch_axis=0,
+                ref_s=np.array([6, 7]),
+                ref_X=np.array([6, 2, 7, 3]).reshape(2, 2),
+            ),
+            ProcessStatsTestCase(
+                reduced_shape=(2,),
+                activation_shapes=[(5, 2), (5, 2)],
+                subset_size=2,
+                ref_s=np.array([2, 3]),
+                ref_X=np.array([0, 2, 1, 3]).reshape(2, 2),
+            ),
             # 3D Activations
-            ((4, 8), 10, 5, (4, 8), (4, 8, 5), [0, 2, 4, 6, 8]),
-            ((4, 8), 5, 10, (4, 8), (4, 8, 5), [0, 1, 2, 3, 4]),
-            ((4, 8), 25, 8, (4, 8), (4, 8, 9), [0, 3, 6, 9, 12, 15, 18, 21, 24]),
+            ProcessStatsTestCase(
+                reduced_shape=(2, 4),
+                activation_shapes=[(1, 2, 4), (3, 2, 4), (5, 2, 4), (10, 2, 4)],
+                subset_size=2,
+                ref_s=np.array(list(range(24, 32))).reshape(2, 4),
+                ref_X=np.array([24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15]).reshape(2, 4, 2),
+            ),
+            ProcessStatsTestCase(
+                reduced_shape=(2, 4),
+                activation_shapes=[(1, 100000, 2, 4), (3, 10000, 2, 4), (5, 1000, 2, 4), (10, 5, 2, 4)],
+                subset_size=2,
+                act_ch_axis=1,
+                ref_s=np.array(list(range(24, 32))).reshape(2, 4),
+                ref_X=np.array([24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15]).reshape(2, 4, 2),
+            ),
+            ProcessStatsTestCase(
+                reduced_shape=(2, 4),
+                activation_shapes=[(1, 2, 4), (1, 2, 4)],
+                subset_size=2,
+                ref_s=np.array(list(range(8, 16))).reshape(2, 4),
+                ref_X=np.array([0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15]).reshape(2, 4, 2),
+            ),
         ],
     )
-    def test_process_stats(
-        self, mean_values_shape, num_samples, subset_size, expected_s_shape, expected_X_shape, expected_indices
-    ):
-        total_elements = reduce(mul, mean_values_shape, 1)
+    def test_process_stats(self, case: ProcessStatsTestCase):
+        total_elements = reduce(mul, case.reduced_shape, 1)
         mean_values = [
-            Tensor(np.arange(i * total_elements, (i + 1) * total_elements, dtype=np.float32).reshape(mean_values_shape))
-            for i in range(num_samples)
+            Tensor(
+                np.arange(i * total_elements, (i + 1) * total_elements, dtype=np.float32).reshape(case.reduced_shape)
+            )
+            for i in range(len(case.activation_shapes))
         ]
-        shape_values = [(1,) + mean_values_shape for _ in range(num_samples)]
 
-        stats = WCTensorStatistic(mean_values=mean_values, shape_values=shape_values)
+        stats = WCTensorStatistic(mean_values=mean_values, shape_values=case.activation_shapes)
 
-        s, X = process_stats(stats, subset_size)
+        if case.act_ch_axis is None:
+            s, X = process_stats(stats, case.subset_size)
+        else:
+            s, X = process_stats(stats, case.subset_size, case.act_ch_axis)
 
-        assert s.shape == expected_s_shape, f"Expected s shape {expected_s_shape}, got {s.shape}"
-        assert X.shape == expected_X_shape, f"Expected X shape {expected_X_shape}, got {X.shape}"
+        assert s.shape == case.ref_s.shape
+        assert fns.allclose(s, self.to_tensor(case.ref_s))
+        assert X.shape == case.ref_X.shape
+        assert fns.allclose(X, self.to_tensor(case.ref_X))
 
-        X_full_list = [mean_values[i] for i in range(num_samples)]
-        X_full = fns.stack(X_full_list)
-        axes = list(range(1, len(X_full.shape))) + [0]
-        X_full_transposed = fns.transpose(X_full, axes=axes)
+    @staticmethod
+    @abstractmethod
+    def get_transposable_awq_model(transpose_a: bool, transpose_b: bool, input_shape=None) -> TModel:
+        "Returns a backend model for test_compression_with_transpose."
 
-        for idx, sample_idx in enumerate(expected_indices):
-            expected_sample = X_full_transposed[..., sample_idx]
-            actual_sample = X[..., idx]
-            assert fns.all(actual_sample == expected_sample)
+    @pytest.mark.parametrize(
+        "kwargs",
+        [
+            dict(scale_estimation=True),
+            dict(lora_correction=True),
+            dict(
+                gptq=True,
+                advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
+            ),
+        ],
+    )
+    def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
+        if not transpose_a_supported:
+            pytest.skip("transpose_a is not supported for the current backend")
+        if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
+            pytest.skip("Scale estimation is not supported")
+        if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
+            pytest.skip("GPTQ is not supported")
+        if kwargs.get("lora_correction", False) and "lora_correction" in self.get_not_supported_algorithms():
+            pytest.skip("lora_correction is not supported")
+
+        INPUT_SHAPE = (2, 4)
+        model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
+        input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
+        input = self.to_tensor(input)
+        dataset = Dataset([input] * 2, self.get_transform_func())
 
-        expected_s = fns.max(fns.abs(X_full_transposed), axis=-1)
-        assert fns.all(s == expected_s)
+        with pytest.raises(nncf.UnsupportedModelError):
+            compress_weights(
+                model,
+                mode=CompressWeightsMode.INT4_SYM,
+                ratio=1.0,
+                group_size=1,
+                subset_size=2,
+                dataset=dataset,
+                all_layers=True,
+                **kwargs,
+            )
diff --git a/tests/onnx/common.py b/tests/onnx/common.py
index 803e6dafdcb..7584c6afa70 100644
--- a/tests/onnx/common.py
+++ b/tests/onnx/common.py
@@ -106,6 +106,7 @@ def add_gemm(
         output: Optional[str] = None,
         weight_data: Optional[np.ndarray] = None,
         bias_data: Optional[np.ndarray] = None,
+        trans_a: int = 0,
         trans_b: int = 0,
     ) -> str:
         i = len(self._nodes)
@@ -140,7 +141,7 @@ def add_gemm(
                 inputs=[input, w_name, b_name],
                 outputs=[output],
                 name=f"Gemm_{i}",
-                transA=0,
+                transA=trans_a,
                 transB=trans_b,
             )
         )
diff --git a/tests/onnx/quantization/test_weights_compression.py b/tests/onnx/quantization/test_weights_compression.py
index 365e9653284..46931d5afac 100644
--- a/tests/onnx/quantization/test_weights_compression.py
+++ b/tests/onnx/quantization/test_weights_compression.py
@@ -414,30 +414,65 @@ def get_SAM_PE_model() -> onnx.ModelProto:
         return mb.build()
 
     @staticmethod
-    def get_sequential_matmul_model() -> onnx.ModelProto:
+    def get_sequential_matmul_model(transpose_a: bool) -> onnx.ModelProto:
         """
         Builds a model to be used in the TemplateWeightCompression.test_mixed_precision() test.
         """
         mb = ModelBuilder()
-        x = mb.add_input("input", (1, 4, 4))
-        output = mb.add_output("output", (1, 4, 4))
+        input_shape = (4, 4) if transpose_a else (1, 4, 4)
+        x = mb.add_input("input", input_shape)
 
         main_values = [10000, 1000, 1, 10, 10000]
+
+        if transpose_a:
+            x = mb.add_transpose(x, [1, 0])
         for i, main_value in enumerate(main_values):
             weights_data = np.arange(0, 16).reshape(4, 4).astype(np.float32)
             weights_data[-1, -1] = main_value
             weights_data = weights_data.T
-            x = mb.add_matmul(x, shape=weights_data.shape, output=output if i == 4 else None, data=weights_data)
+            if transpose_a:
+                x = mb.add_gemm(x, shape=weights_data.shape, weight_data=weights_data)
+                # Without additional output there is no edges between gemms in the graph
+                # for some odd reason
+                mb.add_output(x, input_shape)
+            else:
+                x = mb.add_matmul(x, shape=weights_data.shape, data=weights_data)
+                if i == 4:
+                    mb.add_output(x, input_shape)
 
         return mb.build(opset_version=21)
 
+    @staticmethod
+    def get_transposable_awq_model(transpose_a: bool, transpose_b: bool, input_shape=None):
+        mb = ModelBuilder()
+
+        assert len(input_shape) == 2
+        input_shape = input_shape or (2, 3)
+        x = mb.add_input("input", input_shape)
+        output = mb.add_output("output", input_shape)
+
+        inp_ch_idx = -2 if transpose_a else -1
+        w_shape = (input_shape[inp_ch_idx], input_shape[inp_ch_idx])
+        w_data = 0.1 * np.arange(0, np.prod(w_shape), dtype=np.float32).reshape(w_shape) + 0.05
+        w_data = w_data.T
+
+        relu = mb.add_relu(x)
+        mb.add_gemm(
+            relu, w_data.shape, weight_data=w_data, trans_a=int(transpose_a), trans_b=int(transpose_b), output=output
+        )
+        model = mb.build()
+        return model
+
     @staticmethod
     def to_tensor(x: np.ndarray) -> np.ndarray:
         return np.array(x)
 
     @staticmethod
-    def check_weights(model: onnx.ModelProto, ref_ids: list[int]) -> None:
+    def check_weights(model: onnx.ModelProto, ref_ids: list[int], transpose_a: bool = False) -> None:
         names = {i.name for i in model.graph.initializer if i.data_type == onnx.TensorProto.INT4}
+        if transpose_a:
+            # First transpose node increments weights indexes
+            ref_ids = [i + 1 for i in ref_ids]
         low_precision_nodes = {f"W_{i}_quantized" for i in ref_ids}
         assert low_precision_nodes == names
 
@@ -696,7 +731,7 @@ def get_num_multiply_from_awq(model: onnx.ModelProto) -> int:
         return awq_num
 
     @staticmethod
-    def get_awq_model() -> onnx.ModelProto:
+    def get_awq_model(non_mergable_pattern: bool) -> onnx.ModelProto:
         """
         Builds a model to be used in the following tests:
             - TemplateWeightCompression.test_awq_with_ignored_scope()
@@ -713,11 +748,17 @@ def get_awq_model() -> onnx.ModelProto:
         w_data = w_data.T
 
         num_blocks = 2
+
         for i in range(num_blocks):
-            a = mb.add_matmul(x, shape=w_data.shape, data=w_data)
-            b = mb.add_matmul(x, shape=w_data.shape, data=w_data)
-            x = mb.add_mul(a, b)
-            x = mb.add_matmul(x, shape=w_data.shape, output=output if i == num_blocks - 1 else None, data=w_data)
+            if non_mergable_pattern:
+                a = mb.add_matmul(x, shape=w_data.shape, data=w_data)
+                b = mb.add_relu(a)
+                x = mb.add_matmul(b, shape=w_data.shape, output=output if i == num_blocks - 1 else None, data=w_data)
+            else:
+                a = mb.add_matmul(x, shape=w_data.shape, data=w_data)
+                b = mb.add_matmul(x, shape=w_data.shape, data=w_data)
+                x = mb.add_mul(a, b)
+                x = mb.add_matmul(x, shape=w_data.shape, output=output if i == num_blocks - 1 else None, data=w_data)
 
         return mb.build()
 
@@ -764,14 +805,35 @@ def get_ignored_scope_name() -> str:
         return "MatMul_4"  # Zero-based indices (e.g., MatMul_0, MatMul_1, ...)
 
     @staticmethod
-    def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
+    @pytest.fixture
+    def test_awq_scale_ref() -> dict[str, Tensor]:
         return {
+            "Gemm_1": Tensor(np.array([[14.299703], [8.364688]], dtype=np.float32)),
             "MatMul_3": Tensor(
                 np.array(
                     [[1.2264546, 1.2054994, 1.1413403, 1.0974358, 1.0643553, 1.0379708, 1.0161183, 0.9975262]],
                     dtype=np.float32,
-                ).T
-            )
+                )
+            ),
+            "MatMul_2": Tensor(
+                np.array(
+                    [
+                        [
+                            [
+                                1.9909902,
+                                1.8632966,
+                                1.5759803,
+                                1.3974594,
+                                1.2722752,
+                                1.1779976,
+                                1.1035581,
+                                1.042768,
+                            ]
+                        ]
+                    ],
+                    dtype=np.float32,
+                ),
+            ),
         }
 
     @staticmethod
@@ -784,3 +846,7 @@ def transform_func(x):
     @staticmethod
     def get_reduction_axes() -> int:
         return 0
+
+    @pytest.fixture
+    def transpose_a_supported(self) -> bool:
+        return True
diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py
index 82c09759db8..db26d61a77e 100644
--- a/tests/openvino/native/models.py
+++ b/tests/openvino/native/models.py
@@ -614,7 +614,9 @@ def _create_ov_model(self, input_name) -> ov.Model:
 
 @SYNTHETIC_MODELS.register()
 class IntegerModel(OVReferenceModel):
-    def _create_ov_model(self, dim1=1, dim2=7, dim3=6, max_input_value=2, add_batch_dimension=False, positive_w=True):
+    def _create_ov_model(
+        self, dim1=1, dim2=7, dim3=6, max_input_value=2, add_batch_dimension=False, positive_w=True, transpose_a=False
+    ):
         def get_rand_w(shape):
             value = self._rng.random(shape)
             return value if positive_w else value * 2 - 1
@@ -643,7 +645,11 @@ def get_rand_w(shape):
         gather_4.set_friendly_name("Gather_4")
 
         matmul_2_data = opset.constant(get_rand_w((dim3, dim2)), dtype=np.float32, name="matmul_2_data")
-        matmul_2 = opset.matmul(gather_4, matmul_2_data, transpose_a=False, transpose_b=True, name="MatMul_2")
+        if transpose_a:
+            transpose = opset.transpose(gather_4, [1, 0])
+        else:
+            transpose = gather_4
+        matmul_2 = opset.matmul(transpose, matmul_2_data, transpose_a=transpose_a, transpose_b=True, name="MatMul_2")
         add_1 = opset.add(matmul_1, matmul_2, name="Add_1")
 
         result = opset.result(add_1, name="Result")
@@ -812,17 +818,23 @@ class SequentialMatmulModel(OVReferenceModel):
         rel_error= 0.03
     """
 
-    def _create_ov_model(self, mm_hidden_dim=4):
-        input_node = opset.parameter([1, 4, mm_hidden_dim], name="Input_1")
+    def _create_ov_model(self, mm_hidden_dim=4, transpose_a: bool = False):
+        # Make 2d inputs for transposed model
+        # to allign with onnx ref model
+        if transpose_a:
+            input_node = opset.parameter([4, mm_hidden_dim], name="Input_1")
+            last_node = opset.transpose(input_node, input_order=[1, 0])
+        else:
+            input_node = opset.parameter([1, 4, mm_hidden_dim], name="Input_1")
+            last_node = input_node
         main_values = [10000, 1000, 1, 10, 10000]
 
-        last_node = input_node
         for i, main_value in enumerate(main_values):
             weights_data = np.arange(0, mm_hidden_dim**2).reshape(mm_hidden_dim, mm_hidden_dim)
             weights_data[-1, -1] = main_value
             current_weights = opset.constant(weights_data, dtype=np.float32, name=f"weights_{i}")
             current_node = opset.matmul(
-                last_node, current_weights, transpose_a=False, transpose_b=True, name=f"MatMul_{i}"
+                last_node, current_weights, transpose_a=transpose_a, transpose_b=True, name=f"MatMul_{i}"
             )
             last_node = current_node
 
@@ -1003,7 +1015,7 @@ def get_weights(weights_data, is_int8, name):
         )
         return (qw - zp) * scale
 
-    def _create_ov_model(self, n_extra_dims: int = 1, is_int8=False):
+    def _create_ov_model(self, n_extra_dims: int = 1, is_int8=False, non_mergable_pattern: bool = False):
         input_node = opset.parameter([1] * n_extra_dims + [-1, 8], name="Input_1")
 
         weights_data1 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
@@ -1012,13 +1024,16 @@ def _create_ov_model(self, n_extra_dims: int = 1, is_int8=False):
 
         weights_data2 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
         weights2 = self.get_weights(weights_data2, is_int8, name="weights_2")
-        node2 = opset.matmul(input_node, weights2, transpose_a=False, transpose_b=True, name="MatMul_2")
-
-        node_multiply = opset.multiply(node1, node2, name="Multiply")
+        if non_mergable_pattern:
+            relu = opset.relu(node1)
+            node3 = opset.matmul(relu, weights2, transpose_a=False, transpose_b=True, name="MatMul_2")
+        else:
+            node2 = opset.matmul(input_node, weights2, transpose_a=False, transpose_b=True, name="MatMul_2")
+            node_multiply = opset.multiply(node1, node2, name="Multiply")
 
-        weights_data3 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
-        weights3 = self.get_weights(weights_data3, is_int8, name="weights_3")
-        node3 = opset.matmul(node_multiply, weights3, transpose_a=False, transpose_b=True, name="MatMul_3")
+            weights_data3 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
+            weights3 = self.get_weights(weights_data3, is_int8, name="weights_3")
+            node3 = opset.matmul(node_multiply, weights3, transpose_a=False, transpose_b=True, name="MatMul_3")
 
         weights_data4 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
         weights4 = self.get_weights(weights_data4, is_int8, name="weights_4")
@@ -1026,13 +1041,18 @@ def _create_ov_model(self, n_extra_dims: int = 1, is_int8=False):
 
         weights_data5 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
         weights5 = self.get_weights(weights_data5, is_int8, name="weights_5")
-        node5 = opset.matmul(node3, weights5, transpose_a=False, transpose_b=True, name="MatMul_5")
 
-        node_multiply_2 = opset.multiply(node4, node5, name="Multiply_2")
+        if non_mergable_pattern:
+            relu = opset.relu(node4)
+            node6 = opset.matmul(relu, weights5, transpose_a=False, transpose_b=True, name="MatMul_6")
+        else:
+            node5 = opset.matmul(node3, weights5, transpose_a=False, transpose_b=True, name="MatMul_5")
+
+            node_multiply_2 = opset.multiply(node4, node5, name="Multiply_2")
 
-        weights_data6 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
-        weights6 = self.get_weights(weights_data6, is_int8, name="weights_6")
-        node6 = opset.matmul(node_multiply_2, weights6, transpose_a=False, transpose_b=True, name="MatMul_6")
+            weights_data6 = 0.01 * np.arange(0, 64).reshape(8, 8) + 0.05
+            weights6 = self.get_weights(weights_data6, is_int8, name="weights_6")
+            node6 = opset.matmul(node_multiply_2, weights6, transpose_a=False, transpose_b=True, name="MatMul_6")
 
         result = opset.result(node6, name="Result")
         result.get_output_tensor(0).set_names(set(["Result"]))
@@ -1084,6 +1104,43 @@ def _create_ov_model(self, is_int8=False, with_multiply=False, n_layers=8):
         return model
 
 
+class AWQModel(OVReferenceModel):
+    OUTPUT_DIM = 32
+    HIDDEN_DIM = 16
+    INPUT_SHAPE = [1, 24, HIDDEN_DIM]  # [B, SeqLen, HiddenDim]
+
+    def _create_ov_model(
+        self,
+        transpose_a: bool = False,
+        transpose_b: bool = True,
+        input_shape: Optional[list[int]] = None,
+        is_int8=False,
+    ):
+        self._input_shape = self.INPUT_SHAPE if input_shape is None else input_shape
+        hdim_axis = -2 if transpose_a else -1
+        self._hidden_dim = self._input_shape[hdim_axis]
+        input_1 = opset.parameter(self._input_shape, name="Input")
+        weight_shape = self.get_weight_shape(transpose_b)
+        data = self._rng.random(weight_shape).astype(np.float32)
+
+        weights = AWQMatmulModel.get_weights(data, is_int8=is_int8, name="weights_1")
+
+        relu = opset.relu(input_1)
+        matmul = opset.matmul(relu, weights, transpose_a=transpose_a, transpose_b=transpose_b, name="MatMul")
+
+        result = opset.result(matmul, name="Result")
+        result.get_output_tensor(0).set_names(set(["Result"]))
+        model = ov.Model([result], [input_1])
+        return model
+
+    @property
+    def hidden_dim(self):
+        return self._hidden_dim
+
+    def get_weight_shape(self, transpose_b: bool = True):
+        return [self.OUTPUT_DIM, self.hidden_dim] if transpose_b else [self.hidden_dim, self.OUTPUT_DIM]
+
+
 class AWQModel_fp16_overlow(OVReferenceModel):
     """
     Model for testing AWQ algorithm with fp16 overflow fix.
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
index 539ced6085a..dd58ff279a3 100644
--- a/tests/openvino/native/quantization/test_weights_compression.py
+++ b/tests/openvino/native/quantization/test_weights_compression.py
@@ -12,7 +12,7 @@
 import inspect
 import os
 from collections import defaultdict
-from typing import Callable
+from typing import Callable, Optional
 from unittest.mock import patch
 
 import numpy as np
@@ -64,6 +64,7 @@
 from tests.openvino.native.common import get_actual_reference_for_current_openvino
 from tests.openvino.native.models import AWQActMatmulModel
 from tests.openvino.native.models import AWQMatmulModel
+from tests.openvino.native.models import AWQModel
 from tests.openvino.native.models import AWQModel_fp16_overlow
 from tests.openvino.native.models import DifferentChannelSizeMatmulModel
 from tests.openvino.native.models import GatherAndMatmulShareData
@@ -104,7 +105,9 @@ class LMLinearModel(OVReferenceModel):
     HIDDEN_DIM = 16
     INPUT_SHAPE = [1, 24, HIDDEN_DIM]  # [B, SeqLen, HiddenDim]
 
-    def _create_ov_model(self, transpose_b: bool = True, transpose_a=False, input_shape=None):
+    def _create_ov_model(
+        self, transpose_b: bool = True, transpose_a: bool = False, input_shape: Optional[list[int]] = None
+    ):
         self._input_shape = self.INPUT_SHAPE if input_shape is None else input_shape
         hdim_axis = -2 if transpose_a else -1
         self._hidden_dim = self._input_shape[hdim_axis]
@@ -1972,38 +1975,6 @@ def test_compression_with_different_algo_combinations(input_shape, kwargs):
     )
 
 
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        dict(scale_estimation=True),
-        dict(lora_correction=True),
-        dict(
-            gptq=True,
-            awq=True,
-            scale_estimation=True,
-            advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
-        ),
-    ],
-)
-def test_compression_with_transposed_activations(kwargs):
-    dataset_size = 4
-    model = LMLinearModel(transpose_a=True, transpose_b=False).ov_model
-    input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size
-    dataset = Dataset(input_data)
-
-    with pytest.raises(nncf.UnsupportedModelError):
-        compress_weights(
-            model,
-            mode=CompressWeightsMode.INT4_SYM,
-            ratio=1.0,
-            group_size=8,
-            subset_size=2,
-            dataset=dataset,
-            all_layers=True,
-            **kwargs,
-        )
-
-
 @pytest.mark.parametrize("disabled", [False, True])
 def test_disabled_optimized_compression(disabled):
     hidden_dim = (MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION // LMLinearModel.OUTPUT_DIM) + 1
@@ -2190,8 +2161,8 @@ def get_SAM_PE_model() -> ov.Model:
         return SAMPEModel().ov_model
 
     @staticmethod
-    def get_sequential_matmul_model() -> ov.Model:
-        return SequentialMatmulModel().ov_model
+    def get_sequential_matmul_model(transpose_a: bool) -> ov.Model:
+        return SequentialMatmulModel(transpose_a=transpose_a).ov_model
 
     @staticmethod
     def get_model_for_test_scale_estimation():
@@ -2202,8 +2173,8 @@ def get_moe_model_for_test_scale_estimation():
         return SimpleMoEModel().ov_model
 
     @staticmethod
-    def get_awq_model() -> ov.Model:
-        return AWQMatmulModel().ov_model
+    def get_awq_model(non_mergable_pattern: bool) -> ov.Model:
+        return AWQMatmulModel(non_mergable_pattern=non_mergable_pattern).ov_model
 
     @staticmethod
     def get_different_channel_size_model(channel_sizes: list[int]) -> ov.Model:
@@ -2213,6 +2184,11 @@ def get_different_channel_size_model(channel_sizes: list[int]) -> ov.Model:
     def get_awq_act_model(with_multiply, n_layers):
         return AWQActMatmulModel(with_multiply=with_multiply, n_layers=n_layers).ov_model
 
+    @staticmethod
+    def get_transposable_awq_model(transpose_a, transpose_b, input_shape=None):
+        ov_model = AWQModel(transpose_a=transpose_a, transpose_b=transpose_b, input_shape=input_shape).ov_model
+        return ov_model
+
     @staticmethod
     def to_tensor(x) -> np.ndarray:
         return np.array(x)
@@ -2226,7 +2202,7 @@ def cast_to(x: np.ndarray, dtype: TensorDataType) -> np.ndarray:
         raise NotImplementedError
 
     @staticmethod
-    def check_weights(model: ov.Model, ref_ids: list[int]) -> None:
+    def check_weights(model: ov.Model, ref_ids: list[int], transpose_a=False) -> None:
         names = {op.get_friendly_name() for op in model.get_ordered_ops() if op.get_element_type() == ov.Type.i4}
         low_precision_nodes = {f"weights_{i}" for i in ref_ids}
         assert low_precision_nodes == names
@@ -2441,12 +2417,24 @@ def get_num_multiply_from_awq(model):
         return awq_num
 
     @staticmethod
-    def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
+    @pytest.fixture
+    def test_awq_scale_ref() -> dict[str, Tensor]:
         return {
+            "MatMul": Tensor(np.array([[10.337929], [6.4558873]], dtype=np.float32)),
             "MatMul_3": Tensor(
                 np.array(
                     [[1.2264546, 1.2054994, 1.1413403, 1.0974358, 1.0643553, 1.0379708, 1.0161183, 0.9975262]],
                     dtype=np.float32,
+                ).T
+            ),
+            "MatMul_2": Tensor(
+                np.array(
+                    [[[1.9909902, 1.8632966, 1.5759803, 1.3974594, 1.2722752, 1.1779976, 1.1035581, 1.042768]]],
+                    dtype=np.float32,
                 )
-            )
+            ),
         }
+
+    @pytest.fixture
+    def transpose_a_supported(self) -> bool:
+        return True
diff --git a/tests/torch/function_hook/quantization/test_weights_compression.py b/tests/torch/function_hook/quantization/test_weights_compression.py
index 7caad2aba73..4f01323b199 100644
--- a/tests/torch/function_hook/quantization/test_weights_compression.py
+++ b/tests/torch/function_hook/quantization/test_weights_compression.py
@@ -174,16 +174,19 @@ def forward(self, x):
 
 
 class AWQLinearModel(nn.Module):
-    def __init__(self, is_int8=False):
+    def __init__(self, non_mergable_pattern: bool = False, is_int8=False):
         super().__init__()
         self.is_int8 = is_int8
+        self.non_mergable_pattern = non_mergable_pattern
 
         self.linear1 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
         self.linear2 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
         self.linear3 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
         self.linear4 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
-        self.linear5 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
-        self.linear6 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
+
+        if not non_mergable_pattern:
+            self.linear5 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
+            self.linear6 = self.get_linear_layer(0.01 * torch.arange(0, 64).reshape(8, 8) + 0.05, is_int8)
 
     def get_linear_layer(self, weights_data, is_int8):
         if not is_int8:
@@ -200,9 +203,19 @@ def get_linear_layer(self, weights_data, is_int8):
         return linear_layer
 
     def forward(self, x):
-        node1 = self.linear1(x)
-        node2 = self.linear2(x)
-        node_multiply = node1 * node2
+        if self.non_mergable_pattern:
+            node1 = self.linear1(x)
+            y = torch.relu(node1)
+            node_multiply = self.linear2(y)
+        else:
+            node1 = self.linear1(x)
+            node2 = self.linear2(x)
+            node_multiply = node1 * node2
+
+        if self.non_mergable_pattern:
+            node3 = self.linear3(node_multiply)
+            y = torch.relu(node3)
+            return self.linear4(y)
 
         node3 = self.linear3(node_multiply)
         node4 = self.linear4(node3)
@@ -500,7 +513,7 @@ def get_SAM_PE_model() -> torch.nn.Module:
         return SAMPEModel()
 
     @staticmethod
-    def get_sequential_matmul_model() -> torch.nn.Module:
+    def get_sequential_matmul_model(transpose_a: bool) -> torch.nn.Module:
         return SequentialMatmulModel()
 
     @staticmethod
@@ -516,8 +529,8 @@ def get_moe_model_for_test_scale_estimation():
         return model
 
     @staticmethod
-    def get_awq_model() -> torch.nn.Module:
-        return AWQLinearModel()
+    def get_awq_model(non_mergable_pattern: bool) -> torch.nn.Module:
+        return AWQLinearModel(non_mergable_pattern=non_mergable_pattern)
 
     @staticmethod
     def get_different_channel_size_model(channel_sizes: list[int]) -> torch.nn.Module:
@@ -536,7 +549,7 @@ def cast_to(x: torch.Tensor, dtype: TensorDataType) -> torch.Tensor:
         return cast_to(x, dtype)
 
     @staticmethod
-    def check_weights(model: torch.nn.Module, ref_ids: list[int]) -> None:
+    def check_weights(model: torch.nn.Module, ref_ids: list[int], transpose_a=False) -> None:
         all_names = model.get_weight_names_in_exec_order()
         low_precision_nodes = list(map(lambda i: all_names[i], ref_ids))
         decompressed_modules = list(
@@ -746,13 +759,44 @@ def get_num_multiply_from_awq(model):
         return awq_num
 
     @staticmethod
-    def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
+    @pytest.fixture
+    def test_awq_scale_ref() -> dict[str, Tensor]:
         return {
             "linear3/linear/0": Tensor(
-                torch.tensor([[1.226455, 1.205499, 1.141340, 1.097436, 1.064355, 1.037971, 1.016118, 0.997526]])
-            )
+                torch.tensor(
+                    [[1.226455, 1.205499, 1.141340, 1.097436, 1.064355, 1.037971, 1.016118, 0.997526]],
+                    dtype=torch.float32,
+                ).T
+            ),
+            "linear2/linear/0": Tensor(
+                torch.tensor(
+                    [
+                        [
+                            [
+                                1.9909899235,
+                                1.8632963896,
+                                1.5759800673,
+                                1.3974593878,
+                                1.2722752094,
+                                1.1779977083,
+                                1.1035580635,
+                                1.0427680016,
+                            ]
+                        ]
+                    ],
+                    dtype=torch.float32,
+                )
+            ),
         }
 
+    @staticmethod
+    def get_transposable_awq_model(transpose_a: bool, transpose_b: bool):
+        pass
+
+    @pytest.fixture
+    def transpose_a_supported(self) -> bool:
+        return False
+
 
 @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
 def test_half_precision_models(dtype):
@@ -768,3 +812,7 @@ def test_half_precision_models(dtype):
         awq=True,
         dataset=nncf.Dataset([dict(inputs)]),
     )
+
+    @pytest.fixture
+    def tranpose_a_supported() -> bool:
+        return False
diff --git a/tests/torch/fx/test_compress_weights.py b/tests/torch/fx/test_weights_compression.py
similarity index 95%
rename from tests/torch/fx/test_compress_weights.py
rename to tests/torch/fx/test_weights_compression.py
index 2d447b94d4c..139492d88f2 100644
--- a/tests/torch/fx/test_compress_weights.py
+++ b/tests/torch/fx/test_weights_compression.py
@@ -338,7 +338,7 @@ def get_SAM_PE_model() -> torch.fx.GraphModule:
         return exported_model
 
     @staticmethod
-    def get_sequential_matmul_model() -> torch.fx.GraphModule:
+    def get_sequential_matmul_model(transpose_a: bool) -> torch.fx.GraphModule:
         model = SequentialMatmulModel()
         ex_input = torch.ones([1, 4, 4], dtype=torch.float32)
         exported_model = get_torch_fx_model(model, ex_input)
@@ -363,8 +363,8 @@ def get_moe_model_for_test_scale_estimation():
         return exported_model
 
     @staticmethod
-    def get_awq_model() -> torch.fx.GraphModule:
-        model = AWQLinearModel()
+    def get_awq_model(non_mergable_pattern: bool) -> torch.fx.GraphModule:
+        model = AWQLinearModel(non_mergable_pattern=non_mergable_pattern)
         dynamic_shapes = [[None, torch.export.Dim("dynamic_shape"), None]]
         ex_input = torch.ones([1, 4, 8], dtype=torch.float32)
         exported_model = get_torch_fx_model(model, ex_input, dynamic_shapes=dynamic_shapes)
@@ -393,7 +393,7 @@ def cast_to(x: torch.Tensor, dtype: TensorDataType) -> torch.Tensor:
         return cast_to(x, dtype)
 
     @staticmethod
-    def check_weights(model: torch.fx.GraphModule, ref_ids: list[int]) -> None:
+    def check_weights(model: torch.fx.GraphModule, ref_ids: list[int], transpose_a=False) -> None:
         all_names = list(model.graph.nodes)
         low_precision_nodes = list(map(lambda i: all_names[i].name, ref_ids))
         for node in model.graph.nodes:
@@ -610,9 +610,37 @@ def get_num_multiply_from_awq(model):
         return awq_num
 
     @staticmethod
-    def get_reference_for_test_awq_scale_reference() -> dict[str, Tensor]:
+    @pytest.fixture
+    def test_awq_scale_ref() -> dict[str, Tensor]:
         return {
             "linear_2": Tensor(
-                torch.tensor([[1.226455, 1.205499, 1.141340, 1.097436, 1.064355, 1.037971, 1.016118, 0.997526]])
-            )
+                torch.tensor([[1.226455, 1.205499, 1.141340, 1.097436, 1.064355, 1.037971, 1.016118, 0.997526]]).T
+            ),
+            "linear_1": Tensor(
+                torch.tensor(
+                    [
+                        [
+                            [
+                                1.9909899235,
+                                1.8632963896,
+                                1.5759800673,
+                                1.3974593878,
+                                1.2722752094,
+                                1.1779977083,
+                                1.1035580635,
+                                1.0427680016,
+                            ]
+                        ]
+                    ],
+                    dtype=torch.float32,
+                )
+            ),
         }
+
+    @staticmethod
+    def get_transposable_awq_model(transpose_a: bool, transpose_b: bool):
+        pass
+
+    @pytest.fixture
+    def transpose_a_supported(self) -> bool:
+        return False
diff --git a/tests/torch/test_node_utils.py b/tests/torch/test_node_utils.py
new file mode 100644
index 00000000000..8e52f1c3050
--- /dev/null
+++ b/tests/torch/test_node_utils.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2026 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import nncf
+import nncf.torch.graph.operator_metatypes as op
+from nncf.common.graph import NNCFNode
+from nncf.torch.node_utils import get_activation_channel_axis
+
+
+@pytest.mark.parametrize(
+    "metatype,port_id,ref_out",
+    (
+        (op.PTLinearMetatype, 0, -1),
+        (op.PTConv2dMetatype, 0, 1),
+        (op.PTDepthwiseConv2dSubtype, 0, 1),
+        (op.PTConvTranspose2dMetatype, 0, 1),
+        (op.PTMatMulMetatype, 0, -1),
+        (op.PTMatMulMetatype, 1, -2),
+        (op.PTAddmmMetatype, 0, -1),
+        (op.PTAddmmMetatype, 1, -2),
+        (op.PTMatMulMetatype, 2, "error"),
+        (op.PTAddMetatype, 0, "error"),
+    ),
+)
+def test_get_activation_channel_axis(metatype, port_id, ref_out):
+    node = NNCFNode({"metatype": metatype})
+    if ref_out == "error":
+        with pytest.raises(nncf.InternalError):
+            get_activation_channel_axis(node, port_id)
+    else:
+        assert get_activation_channel_axis(node, port_id) == ref_out