openvinotoolkit · Shehrozkashif · Jan 29, 2026 · Jan 29, 2026 · Feb 4, 2026 · Feb 4, 2026
diff --git a/src/nncf/quantization/algorithms/weight_compression/activation_stats.py b/src/nncf/quantization/algorithms/weight_compression/activation_stats.py
@@ -17,7 +17,12 @@
 from nncf.tensor import functions as fns
 
 
-def process_stats(stats: WCTensorStatistic, subset_size: int, act_ch_axis: int = -1) -> tuple[Tensor, Tensor]:
+def process_stats(
+    stats: WCTensorStatistic,
+    subset_size: int,
+    act_ch_axis: int = -1,
+    transpose_a: bool = False,
+) -> tuple[Tensor, Tensor]:
     """
     A function for processing activations. Shared between AWQ, Scale Estimation and LoRA Correction algorithms.
 
@@ -37,6 +42,11 @@ def process_stats(stats: WCTensorStatistic, subset_size: int, act_ch_axis: int =
     axes = list(range(1, len(X.shape))) + [0]
     X_full = fns.transpose(X, axes=axes)
 
+    if transpose_a:
+        axes = list(range(len(X_full.shape)))
+        axes[-1], axes[-2] = axes[-2], axes[-1]
+        X_full = fns.transpose(X_full, axes=axes)
+
     # The sample dimension is always the last axis after transpose
     sample_axis = -1
 

@@ -121,6 +121,7 @@ def calculate_adapters(
         layer_name = wc_params.node_with_weight.node_name
         layer_statistics = self._statistics[layer_name]
         is_debug = self._debug_interface is not None
+        transpose_a_flag = getattr(wc_params.node_with_weight, "transpose_a", False)
         lora_A, lora_B, mean_noises = self.calculate_low_rank_matrices(
             weight,
             compressed_weight,
@@ -129,6 +130,7 @@ def calculate_adapters(
             self._lora_correction_params,
             layer_statistics,
             is_debug,
+            transpose_a=transpose_a_flag,
         )
         if is_debug:
             self._debug_interface.add_noises(layer_name, mean_noises)
@@ -143,6 +145,7 @@ def calculate_low_rank_matrices(
         lora_correction_params: AdvancedLoraCorrectionParameters,
         layer_statistics: WCTensorStatistic,
         is_debug: Optional[bool] = False,
+        transpose_a: bool = False,
     ):
         """
         Calculates low rank matrices for a given original and compressed weights.
@@ -170,7 +173,15 @@ def calculate_low_rank_matrices(
         )
         mode = compression_config.mode
         assert len(reduction_axes) == 1, "Assumed a single reduction axis"
-        reduction_axis = reduction_axes[0] if compression_config.group_size != -1 else -1
+
+        if compression_config.group_size != -1:
+            reduction_axis = reduction_axes[0]
+        else:
+            reduction_axis = -1
+
+        if transpose_a and reduction_axis != -1:
+            reduction_axis = 1
+
         if mode in (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM):
             fq_weights = do_integer_dequantization(
                 compressed_weight.tensor,
@@ -192,9 +203,8 @@ def calculate_low_rank_matrices(
         # reduction axes is all axes except output dimension in linear/conv layers.
         if reduction_axes[0] == 1:
             svd_residual = fns.transpose(svd_residual)
-        residual = svd_residual.clone()  # [H, O]
-
-        s, X = process_stats(layer_statistics, subset_size)  # [H], [H, SS]
+        residual = fns.transpose(svd_residual) if transpose_a else svd_residual  # [H, O] or [O, H]
+        s, X = process_stats(layer_statistics, subset_size, act_ch_axis=-1, transpose_a=transpose_a)
         X = fns.transpose(X)  # [SS, H]
         if compression_config.group_size > 0:
             # Multiply residual of weights by maximum channel magnitude of activations normalized per quantization

@@ -28,6 +28,7 @@
 from nncf import SensitivityMetric
 from nncf.common.factory import build_graph
 from nncf.common.tensor_statistics.collectors import AggregatorBase
+from nncf.common.tensor_statistics.statistics import WCTensorStatistic
 from nncf.common.utils.debug import nncf_debug
 from nncf.common.utils.helpers import set_env_variable
 from nncf.data.dataset import Dataset
@@ -42,6 +43,7 @@
 from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams
 from nncf.quantization.advanced_parameters import AdvancedLoraCorrectionParameters as LoraParams
 from nncf.quantization.advanced_parameters import GroupSizeFallbackMode
+from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.mixed_precision import MIXED_PRECISION_CRITERIA
@@ -2574,3 +2576,79 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]:
     @pytest.fixture
     def transpose_a_supported(self) -> bool:
         return True
+
+
+def test_process_stats_with_transpose_a_changes_layout():
+    activations = np.random.randn(10, 3, 8)
+
+    stats = WCTensorStatistic(
+        Tensor(activations),
+        shape_values=activations.shape,
+    )
+
+    subset_size = 10
+
+    s_default, X_default = process_stats(
+        stats,
+        subset_size=subset_size,
+        act_ch_axis=-1,
+        transpose_a=False,
+    )
+
+    s_transposed, X_transposed = process_stats(
+        stats,
+        subset_size=subset_size,
+        act_ch_axis=-1,
+        transpose_a=True,
+    )
+
+    # Rank must stay the same
+    assert len(s_default.shape) == len(s_transposed.shape)
+
+    # Reduction dimension (seq_len) must be preserved
+    assert s_default.shape[0] == s_transposed.shape[0] == 3
+
+    # Layout must change
+    assert X_default.shape != X_transposed.shape
+
+    # Element count preserved
+    assert np.prod(X_default.shape) == np.prod(X_transposed.shape)
+
+
+@pytest.mark.parametrize(
+    "transpose_a,transpose_b",
+    [
+        (False, False),
+        (False, True),
+    ],
+)
+def test_lora_transpose_a_fix(transpose_a, transpose_b):
+    """
+    Test LoRA correction insertion only with transpose_a=False
+    because transposed activations are not yet supported by LoRA.
+    """
+    # Setup LoRA parameters
+    params = LoraParams(adapter_rank=4, use_int8_adapters=False)
+    advanced_parameters = CompressionParams(lora_correction_params=params)
+
+    # Initialize model with given transpose configuration
+    model = LMLinearModel(transpose_b=transpose_b, transpose_a=transpose_a)
+    ov_model = model.ov_model
+
+    # Use dummy dataset with same shape as model input
+    dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs)
+
+    # Compress weights with LoRA correction enabled
+    compressed_model = compress_weights(
+        ov_model,
+        mode=CompressWeightsMode.INT4_SYM,
+        ratio=1.0,
+        group_size=8,
+        dataset=dataset,
+        all_layers=True,
+        lora_correction=True,
+        advanced_parameters=advanced_parameters,
+    )
+
+    # Simple assertion: compressed model is returned
+    assert compressed_model is not None