Skip to content

Commit 57f4fca

Browse files
models with transpose_a faild with error for unsupported algos
1 parent 64f40e0 commit 57f4fca

File tree

10 files changed

+85
-43
lines changed

10 files changed

+85
-43
lines changed

src/nncf/quantization/algorithms/weight_compression/algorithm.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,12 @@ def apply_with_parameters(
10871087
)
10881088

10891089
if self._lora_correction:
1090+
for wc_params in all_weight_params:
1091+
act_port_id = self._backend_entity.get_activation_port_id(wc_params.node_with_weight, graph)
1092+
if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, act_port_id):
1093+
msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
1094+
raise nncf.UnsupportedModelError(msg)
1095+
10901096
lora_correction_params = self._advanced_parameters.lora_correction_params
10911097
lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
10921098
description += " with correction of low-rank adapters"

src/nncf/quantization/algorithms/weight_compression/backend.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,17 @@ def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: TMo
111111
:return: The weight tensor.
112112
"""
113113

114+
@abstractmethod
115+
def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
116+
"""
117+
Checks whether the activation input of a MatMul operation is transposed.
118+
119+
:param matmul: MatMul NNCFGraph node.
120+
:param act_port_id: Index of the input port corresponding to the activation tensor.
121+
:return: True if the node is a matmul node and activation input is transposed,
122+
False otherwise.
123+
"""
124+
114125
@abstractmethod
115126
def get_weight_dtype(
116127
self, node_with_weight: NNCFNode, weight_port_id: int, model: TModel, graph: NNCFGraph
@@ -279,6 +290,7 @@ def get_ignored_patterns() -> GraphPattern:
279290
def get_activation_channel_axis(node: NNCFNode, port_id: int, input_shape: tuple[int]) -> int:
280291
"""
281292
Returns axis number of the activation tensor which correspond to it channel.
293+
282294
:param node: NNCFNode instance.
283295
:param port_id: Port ID for input.
284296
:param input_shape: Shape of the input.

src/nncf/quantization/algorithms/weight_compression/gptq.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ def apply(
124124
CompressWeightsMode.INT8_SYM,
125125
]:
126126
continue
127+
128+
act_port_id = self._backend_entity.get_activation_port_id(wc_params.node_with_weight, graph)
129+
if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, act_port_id):
130+
msg = "Transposed activations are not supported yet for the GPTQ algorithm"
131+
raise nncf.UnsupportedModelError(msg)
132+
127133
_, input_tensors = next(iter(inputs.items()))
128134
hessian = self._calculate_hessian(node, input_tensors)
129135
scale, zero_point = self._quantize_weights(model, graph, wc_params, hessian, input_tensors)

src/nncf/quantization/algorithms/weight_compression/onnx_backend.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ def get_weight(
187187
weight_tensor = get_tensor_value(model, weight_name)
188188
return Tensor(weight_tensor)
189189

190+
def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
191+
if matmul.metatype != metatypes.ONNXGemmMetatype:
192+
return False
193+
trans_attr = "transB" if act_port_id else "transA"
194+
return matmul.layer_attributes.node_attrs[trans_attr]
195+
190196
def get_weight_dtype(
191197
self, node_with_weight: NNCFNode, weight_port_id: int, model: onnx.ModelProto, graph: NNCFGraph
192198
) -> TensorDataType:

src/nncf/quantization/algorithms/weight_compression/openvino_backend.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,16 @@ def get_weight_names_and_port_ids(node: NNCFNode, graph: NNCFGraph) -> list[tupl
135135
return result
136136

137137
def get_weight(self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph) -> Tensor:
138-
if not node_with_weight.layer_attributes.constant_attributes[weight_port_id]["transpose"]:
139-
msg = "Only transposed weights are supported"
140-
raise nncf.UnsupportedModelError(msg)
141138
weight_name = node_with_weight.layer_attributes.constant_attributes[weight_port_id]["name"]
142139
weight_node = self.name_to_node_mapping[weight_name]
143140
weight_tensor = get_const_value_as_numpy_tensor(weight_node)
144141
return Tensor(weight_tensor)
145142

143+
def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
144+
if matmul.metatype != om.OVMatMulMetatype:
145+
return False
146+
return matmul.layer_attributes.input_attributes["transpose"]
147+
146148
def get_weight_dtype(
147149
self, node_with_weight: NNCFNode, weight_port_id: int, model: ov.Model, graph: NNCFGraph
148150
) -> TensorDataType:
@@ -330,15 +332,6 @@ def transform_model(
330332
compression_format: CompressionFormat = CompressionFormat.DQ,
331333
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
332334
) -> ov.Model:
333-
for wc_params in weight_compression_parameters:
334-
if (
335-
lora_correction_algo is not None
336-
and lora_correction_algo.is_applicable(wc_params)
337-
and wc_params.node_with_weight.layer_attributes.input_attributes["transpose"]
338-
):
339-
msg = "Transposed input for the LoRa correction is not supported"
340-
raise nncf.UnsupportedModelError(msg)
341-
342335
for wc_params in weight_compression_parameters:
343336
const_attributes = wc_params.node_with_weight.layer_attributes.constant_attributes[wc_params.weight_port_id]
344337
const_node_name = const_attributes["name"]

src/nncf/quantization/algorithms/weight_compression/scale_estimation.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ def apply(
139139
continue
140140
_, weight_port_id = weight_data[0]
141141

142+
act_port_id = self._backend_entity.get_activation_port_id(wp.node_with_weight, graph)
143+
if self._backend_entity.matmul_has_transposed_activations(wp.node_with_weight, act_port_id):
144+
msg = "Transposed activations are not supported yet for the Scale Estimation algorithm"
145+
raise nncf.UnsupportedModelError(msg)
142146
weight = self._backend_entity.get_weight(wp.node_with_weight, weight_port_id, model, graph)
143147

144148
scale, zero_point = self.calculate_quantization_params(

src/nncf/quantization/algorithms/weight_compression/torch_backend.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,9 @@ def get_weight(
177177
raise nncf.InternalError(msg)
178178
return Tensor(weight)
179179

180+
def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
181+
return False
182+
180183
def get_weight_dtype(
181184
self,
182185
node_with_weight: NNCFNode,

src/nncf/quantization/algorithms/weight_compression/torch_fx_backend.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ def get_weight(
128128

129129
return Tensor(weight)
130130

131+
def matmul_has_transposed_activations(self, matmul: NNCFNode, act_port_id: int) -> bool:
132+
return False
133+
131134
def get_weight_dtype(
132135
self, node_with_weight: NNCFNode, weight_port_id: int, model: torch.fx.GraphModule, graph: NNCFGraph
133136
) -> TensorDataType:

tests/cross_fw/test_templates/template_test_weights_compression.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from nncf.quantization import compress_weights
3232
from nncf.quantization.advanced_parameters import AdvancedAWQParameters as AWQParams
3333
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters as CompressionParams
34+
from nncf.quantization.advanced_parameters import AdvancedGPTQParameters as GPTQParams
3435
from nncf.quantization.algorithms.weight_compression.activation_stats import WCTensorStatistic
3536
from nncf.quantization.algorithms.weight_compression.activation_stats import process_stats
3637
from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression
@@ -779,3 +780,42 @@ def test_process_stats(self, case: ProcessStatsTestCase):
779780
@abstractmethod
780781
def get_transposable_awq_model(transpose_a: bool, transpose_b: bool, input_shape=None) -> TModel:
781782
"Returns a backend model for test_compression_with_transpose."
783+
784+
@pytest.mark.parametrize(
785+
"kwargs",
786+
[
787+
dict(scale_estimation=True),
788+
dict(lora_correction=True),
789+
dict(
790+
gptq=True,
791+
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
792+
),
793+
],
794+
)
795+
def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
796+
if not transpose_a_supported:
797+
pytest.skip("transpose_a is not supported for the current backend")
798+
if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
799+
pytest.skip("Scale estimation is not supported")
800+
if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
801+
pytest.skip("GPTQ is not supported")
802+
if kwargs.get("lora_correction", False) and "lora_correction" in self.get_not_supported_algorithms():
803+
pytest.skip("lora_correction is not supported")
804+
805+
INPUT_SHAPE = (2, 4)
806+
model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
807+
input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
808+
input = self.to_tensor(input)
809+
dataset = Dataset([input] * 2, self.get_transform_func())
810+
811+
with pytest.raises(nncf.UnsupportedModelError):
812+
compress_weights(
813+
model,
814+
mode=CompressWeightsMode.INT4_SYM,
815+
ratio=1.0,
816+
group_size=1,
817+
subset_size=2,
818+
dataset=dataset,
819+
all_layers=True,
820+
**kwargs,
821+
)

tests/openvino/native/quantization/test_weights_compression.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1943,37 +1943,6 @@ def test_compression_with_different_algo_combinations(input_shape, kwargs):
19431943
)
19441944

19451945

1946-
@pytest.mark.parametrize(
1947-
"kwargs",
1948-
[
1949-
dict(scale_estimation=True),
1950-
dict(lora_correction=True),
1951-
dict(
1952-
gptq=True,
1953-
scale_estimation=True,
1954-
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
1955-
),
1956-
],
1957-
)
1958-
def test_compression_with_transposed_activations(kwargs):
1959-
dataset_size = 4
1960-
model = LMLinearModel(transpose_a=True, transpose_b=False).ov_model
1961-
input_data = [np.ones(inp.shape) for inp in model.inputs] * dataset_size
1962-
dataset = Dataset(input_data)
1963-
1964-
with pytest.raises(nncf.UnsupportedModelError):
1965-
compress_weights(
1966-
model,
1967-
mode=CompressWeightsMode.INT4_SYM,
1968-
ratio=1.0,
1969-
group_size=8,
1970-
subset_size=2,
1971-
dataset=dataset,
1972-
all_layers=True,
1973-
**kwargs,
1974-
)
1975-
1976-
19771946
@pytest.mark.parametrize("disabled", [False, True])
19781947
def test_disabled_optimized_compression(disabled):
19791948
hidden_dim = (MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION // LMLinearModel.OUTPUT_DIM) + 1

0 commit comments

Comments
 (0)