Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions src/nncf/common/tensor_statistics/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,21 +270,16 @@ def __eq__(self, other: Any) -> bool:
shapes_equal = all(self.shape_values[i] == other.shape_values[i] for i in range(len(self.mean_values)))
if not shapes_equal:
return False
mean_values_equal = all(
fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values))
)
return mean_values_equal
return all(fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values)))

def _get_serialized_data(self) -> dict[str, Tensor]:
backend = self.mean_values[0].backend
device = self.mean_values[0].device
return {
self.MEAN_STAT: fns.stack(self.mean_values),
self.SHAPE_STAT: fns.tensor(
self.shape_values,
backend=backend,
backend=self.mean_values[0].backend,
dtype=TensorDataType.int32,
device=device,
device=self.mean_values[0].device,
),
Comment on lines 275 to 283
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These refactoring changes to inline variable usage are unrelated to the PR's stated goal of enabling transpose_a support for LoRA Correction. While the refactoring is a reasonable style improvement, it should ideally be in a separate commit or PR to keep changes focused and easier to review. Including unrelated refactoring makes it harder to understand the core changes and could complicate any future bisecting or reverting.

Copilot uses AI. Check for mistakes.
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1152,11 +1152,6 @@ def apply_with_parameters(
)

if self._lora_correction:
for wc_params in all_weight_params:
if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
raise nncf.UnsupportedModelError(msg)

lora_correction_params = self._advanced_parameters.lora_correction_params
lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
description += " with correction of low-rank adapters"
Expand Down Expand Up @@ -1370,7 +1365,7 @@ def _get_statistics_for_weights_compression(
# Where mean_value is a 1D tensor representing an activation reduced over batch and sequence length dimensions,
# shape is an original shape of an activation before reduction, n is the size of the dataset (or subset_size).
statistics = {}
for (act_node, output_port_id, _), matmul_nodes in matmul_input_to_output_nodes_map.items():
for (act_node, output_port_id, _act_channel_axis), matmul_nodes in matmul_input_to_output_nodes_map.items():
tensor_collectors = list(
statistic_points.get_algo_statistics_for_node(
act_node.node_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,19 @@ def is_applicable(self, wc_params: WeightCompressionParameters):
return wc_params.compression_config.num_bits == 4

def calculate_adapters(
self, weight: Tensor, compressed_weight: CompressedWeight, wc_params: WeightCompressionParameters
self,
weight: Tensor,
compressed_weight: CompressedWeight,
wc_params: WeightCompressionParameters,
act_ch_axis: int,
) -> tuple[Tensor, Tensor, list[float]]:
"""
Calculates low rank matrices for a given original and compressed weights.

:param weight: original floating-point weight matrix.
:param compressed_weight: compressed weight matrix.
:param wc_params: parameters of weight compression.
:param act_ch_axis: axis number of the activation tensor which correspond to it channel.
:return: two low rank matrices in the order of execution of corresponding linear layers.
"""
layer_name = wc_params.node_with_weight.node_name
Expand All @@ -128,6 +133,7 @@ def calculate_adapters(
wc_params.reduction_axes,
self._lora_correction_params,
layer_statistics,
act_ch_axis,
is_debug,
)
if is_debug:
Expand All @@ -142,6 +148,7 @@ def calculate_low_rank_matrices(
reduction_axes: tuple[int, ...],
lora_correction_params: AdvancedLoraCorrectionParameters,
layer_statistics: WCTensorStatistic,
act_ch_axis: int,
is_debug: Optional[bool] = False,
):
"""
Expand All @@ -157,6 +164,7 @@ def calculate_low_rank_matrices(
:param reduction_axes: axes along which different statistics reduced.
:param lora_correction_params: parameters to configure the algorithm.
:param layer_statistics: an object containing statistics for the layer.
:param act_ch_axis: axis number of the activation tensor which correspond to it channel.
:param is_debug: whether to collect debug information, defaults to False.
:return: two low rank matrices in the order of execution of corresponding linear layers and list of mean noises.
Noises are collected from each step of the algorithm if debug was enabled.
Expand Down Expand Up @@ -194,8 +202,13 @@ def calculate_low_rank_matrices(
svd_residual = fns.transpose(svd_residual)
residual = svd_residual.clone() # [H, O]

s, X = process_stats(layer_statistics, subset_size) # [H], [H, SS]
X = fns.transpose(X) # [SS, H]
# Pass it to process_stats
s, X = process_stats(layer_statistics, subset_size, act_ch_axis)

# Conditionally transpose X so samples are rows and channels are columns
if act_ch_axis != 0: # if channel is not already the first axis
X = fns.transpose(X, axes=(1, 0)) # [SS, H]
Comment on lines +208 to +210
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conditional transpose logic appears incorrect. The process_stats function always returns X with shape [HiddenDim, SampleSize] (as documented in its docstring line 29), regardless of the act_ch_axis value. The act_ch_axis parameter is only used within process_stats for sampling logic, not for determining the output layout. Therefore, this conditional check if act_ch_axis != 0 doesn't achieve the intended purpose, and the transpose should either always be applied or never be applied. The expected shape after this line should be [SS, H] based on the comment, which means the transpose should always happen since process_stats returns [H, SS].

Suggested change
# Conditionally transpose X so samples are rows and channels are columns
if act_ch_axis != 0: # if channel is not already the first axis
X = fns.transpose(X, axes=(1, 0)) # [SS, H]
# Transpose X so samples are rows and channels are columns.
# process_stats returns X with shape [H, SS], so we convert to [SS, H].
X = fns.transpose(X, axes=(1, 0)) # [SS, H]

Copilot uses AI. Check for mistakes.

if compression_config.group_size > 0:
# Multiply residual of weights by maximum channel magnitude of activations normalized per quantization
# group. As a consequence, weights corresponding to a "noisy" activations has a higher error to correct.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ def insert_adapters(
A_W = opset.constant(lora_A.data)
B_W = opset.constant(lora_B.data)

A_MM = opset.matmul(input_node, A_W, transpose_a=False, transpose_b=True)
transpose_a = wc_params.node_with_weight.layer_attributes.input_attributes["transpose"]
A_MM = opset.matmul(input_node, A_W, transpose_a=transpose_a, transpose_b=True)
B_MM = opset.matmul(A_MM, B_W, transpose_a=False, transpose_b=True)

node_output_port = mm_node.output(0)
Expand Down Expand Up @@ -349,7 +350,15 @@ def transform_model(
compressed_weight.tensor = compressed_weight.tensor.as_numpy_tensor()
if compressed_weight.zero_point is not None:
compressed_weight.zero_point = compressed_weight.zero_point.as_numpy_tensor()
adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params)

activation_port_id = self.get_activation_port_id(wc_params.node_with_weight, graph)
activation_edge = graph.get_input_edge_by_port_id(wc_params.node_with_weight, activation_port_id)
activation_shape = activation_edge.tensor_shape
act_ch_axis = self.get_activation_channel_axis(
wc_params.node_with_weight, activation_port_id, activation_shape
)

adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params, act_ch_axis)
self.insert_adapters(wc_params, *adapters, int8_lora=lora_correction_algo.use_int8_adapters)
self.name_to_node_mapping = None

Expand Down
59 changes: 54 additions & 5 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,12 +1612,25 @@ def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode):


@pytest.mark.parametrize(
"params, transpose_b",
((None, True), (LoraParams(adapter_rank=4, use_int8_adapters=False), False)),
"params, transpose_a, transpose_b",
(
(None, False, True), # original
(LoraParams(adapter_rank=4, use_int8_adapters=False), False, False), # original
pytest.param(
LoraParams(adapter_rank=4, use_int8_adapters=False),
True,
False,
),
pytest.param(
LoraParams(adapter_rank=8, use_int8_adapters=True),
True,
True,
),
),
)
def test_lora_adapters_in_the_graph(params, transpose_b):
def test_lora_adapters_in_the_graph(params, transpose_a, transpose_b):
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description mentions two new tests (test_process_stats_with_transpose_a_changes_layout and test_lora_transpose_a_fix) that are not present in the diff. These tests are important to verify that the transpose_a support is working correctly. Either the tests were not included in this PR, or the PR description needs to be updated to reflect the actual tests that were added.

Copilot uses AI. Check for mistakes.
advanced_parameters = CompressionParams() if params is None else CompressionParams(lora_correction_params=params)
model = LMLinearModel(transpose_b=transpose_b)
model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b)
ov_model = model.ov_model
dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs)

Expand Down Expand Up @@ -2410,7 +2423,7 @@ def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_
def test_awq_with_ignored_scope(self, mocker, is_3d_weights):
return super().test_awq_with_ignored_scope(mocker, is_3d_weights)

# Transpose inputs does not affect mergable pattern code, skippting (True, False)
# Transpose inputs does not affect mergable pattern code
@pytest.mark.parametrize("transpose_a,non_mergable_pattern", [(True, True), (False, True), (False, False)])
@pytest.mark.parametrize(
"is_3d_weights", [False, pytest.param(True, marks=pytest.mark.xfail(reason="Ticket - 176465"))]
Expand Down Expand Up @@ -2608,3 +2621,39 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]:
@pytest.fixture
def transpose_a_supported(self) -> bool:
return True

@pytest.mark.parametrize(
"kwargs",
[
dict(scale_estimation=True),
dict(
gptq=True,
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
),
],
)
def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
if not transpose_a_supported:
pytest.skip("transpose_a is not supported for the current backend")
if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
pytest.skip("Scale estimation is not supported")
if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
pytest.skip("GPTQ is not supported")

INPUT_SHAPE = (2, 4)
model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
input = self.to_tensor(input)
dataset = Dataset([input] * 2, self.get_transform_func())

with pytest.raises(nncf.UnsupportedModelError):
compress_weights(
model,
mode=CompressWeightsMode.INT4_SYM,
ratio=1.0,
group_size=1,
subset_size=2,
dataset=dataset,
all_layers=True,
**kwargs,
)