-
Notifications
You must be signed in to change notification settings - Fork 288
Enable transpose_a support for LoRA Correction #3864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
1c86647
86ee4d8
c6632b4
f6aa62b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -108,14 +108,19 @@ def is_applicable(self, wc_params: WeightCompressionParameters): | |||||||||||||
| return wc_params.compression_config.num_bits == 4 | ||||||||||||||
|
|
||||||||||||||
| def calculate_adapters( | ||||||||||||||
| self, weight: Tensor, compressed_weight: CompressedWeight, wc_params: WeightCompressionParameters | ||||||||||||||
| self, | ||||||||||||||
| weight: Tensor, | ||||||||||||||
| compressed_weight: CompressedWeight, | ||||||||||||||
| wc_params: WeightCompressionParameters, | ||||||||||||||
| act_ch_axis: int, | ||||||||||||||
| ) -> tuple[Tensor, Tensor, list[float]]: | ||||||||||||||
| """ | ||||||||||||||
| Calculates low rank matrices for a given original and compressed weights. | ||||||||||||||
|
|
||||||||||||||
| :param weight: original floating-point weight matrix. | ||||||||||||||
| :param compressed_weight: compressed weight matrix. | ||||||||||||||
| :param wc_params: parameters of weight compression. | ||||||||||||||
| :param act_ch_axis: axis number of the activation tensor which correspond to it channel. | ||||||||||||||
| :return: two low rank matrices in the order of execution of corresponding linear layers. | ||||||||||||||
| """ | ||||||||||||||
| layer_name = wc_params.node_with_weight.node_name | ||||||||||||||
|
|
@@ -128,6 +133,7 @@ def calculate_adapters( | |||||||||||||
| wc_params.reduction_axes, | ||||||||||||||
| self._lora_correction_params, | ||||||||||||||
| layer_statistics, | ||||||||||||||
| act_ch_axis, | ||||||||||||||
| is_debug, | ||||||||||||||
| ) | ||||||||||||||
| if is_debug: | ||||||||||||||
|
|
@@ -142,6 +148,7 @@ def calculate_low_rank_matrices( | |||||||||||||
| reduction_axes: tuple[int, ...], | ||||||||||||||
| lora_correction_params: AdvancedLoraCorrectionParameters, | ||||||||||||||
| layer_statistics: WCTensorStatistic, | ||||||||||||||
| act_ch_axis: int, | ||||||||||||||
| is_debug: Optional[bool] = False, | ||||||||||||||
| ): | ||||||||||||||
| """ | ||||||||||||||
|
|
@@ -157,6 +164,7 @@ def calculate_low_rank_matrices( | |||||||||||||
| :param reduction_axes: axes along which different statistics reduced. | ||||||||||||||
| :param lora_correction_params: parameters to configure the algorithm. | ||||||||||||||
| :param layer_statistics: an object containing statistics for the layer. | ||||||||||||||
| :param act_ch_axis: axis number of the activation tensor which correspond to it channel. | ||||||||||||||
| :param is_debug: whether to collect debug information, defaults to False. | ||||||||||||||
| :return: two low rank matrices in the order of execution of corresponding linear layers and list of mean noises. | ||||||||||||||
| Noises are collected from each step of the algorithm if debug was enabled. | ||||||||||||||
|
|
@@ -194,8 +202,13 @@ def calculate_low_rank_matrices( | |||||||||||||
| svd_residual = fns.transpose(svd_residual) | ||||||||||||||
| residual = svd_residual.clone() # [H, O] | ||||||||||||||
|
|
||||||||||||||
| s, X = process_stats(layer_statistics, subset_size) # [H], [H, SS] | ||||||||||||||
| X = fns.transpose(X) # [SS, H] | ||||||||||||||
| # Pass it to process_stats | ||||||||||||||
| s, X = process_stats(layer_statistics, subset_size, act_ch_axis) | ||||||||||||||
|
|
||||||||||||||
| # Conditionally transpose X so samples are rows and channels are columns | ||||||||||||||
| if act_ch_axis != 0: # if channel is not already the first axis | ||||||||||||||
| X = fns.transpose(X, axes=(1, 0)) # [SS, H] | ||||||||||||||
|
Comment on lines
+208
to
+210
|
||||||||||||||
| # Conditionally transpose X so samples are rows and channels are columns | |
| if act_ch_axis != 0: # if channel is not already the first axis | |
| X = fns.transpose(X, axes=(1, 0)) # [SS, H] | |
| # Transpose X so samples are rows and channels are columns. | |
| # process_stats returns X with shape [H, SS], so we convert to [SS, H]. | |
| X = fns.transpose(X, axes=(1, 0)) # [SS, H] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1612,12 +1612,25 @@ def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode): | |
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "params, transpose_b", | ||
| ((None, True), (LoraParams(adapter_rank=4, use_int8_adapters=False), False)), | ||
| "params, transpose_a, transpose_b", | ||
| ( | ||
| (None, False, True), # original | ||
| (LoraParams(adapter_rank=4, use_int8_adapters=False), False, False), # original | ||
| pytest.param( | ||
| LoraParams(adapter_rank=4, use_int8_adapters=False), | ||
| True, | ||
| False, | ||
| ), | ||
| pytest.param( | ||
| LoraParams(adapter_rank=8, use_int8_adapters=True), | ||
| True, | ||
| True, | ||
| ), | ||
| ), | ||
| ) | ||
| def test_lora_adapters_in_the_graph(params, transpose_b): | ||
| def test_lora_adapters_in_the_graph(params, transpose_a, transpose_b): | ||
|
||
| advanced_parameters = CompressionParams() if params is None else CompressionParams(lora_correction_params=params) | ||
| model = LMLinearModel(transpose_b=transpose_b) | ||
| model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b) | ||
| ov_model = model.ov_model | ||
| dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs) | ||
|
|
||
|
|
@@ -2410,7 +2423,7 @@ def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_ | |
| def test_awq_with_ignored_scope(self, mocker, is_3d_weights): | ||
| return super().test_awq_with_ignored_scope(mocker, is_3d_weights) | ||
|
|
||
| # Transpose inputs does not affect mergable pattern code, skippting (True, False) | ||
| # Transpose inputs does not affect mergable pattern code | ||
| @pytest.mark.parametrize("transpose_a,non_mergable_pattern", [(True, True), (False, True), (False, False)]) | ||
| @pytest.mark.parametrize( | ||
| "is_3d_weights", [False, pytest.param(True, marks=pytest.mark.xfail(reason="Ticket - 176465"))] | ||
|
|
@@ -2608,3 +2621,39 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]: | |
| @pytest.fixture | ||
| def transpose_a_supported(self) -> bool: | ||
| return True | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "kwargs", | ||
| [ | ||
| dict(scale_estimation=True), | ||
| dict( | ||
| gptq=True, | ||
| advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)), | ||
| ), | ||
| ], | ||
| ) | ||
| def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs): | ||
| if not transpose_a_supported: | ||
| pytest.skip("transpose_a is not supported for the current backend") | ||
| if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms(): | ||
| pytest.skip("Scale estimation is not supported") | ||
| if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms(): | ||
| pytest.skip("GPTQ is not supported") | ||
|
|
||
| INPUT_SHAPE = (2, 4) | ||
| model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE) | ||
| input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02 | ||
| input = self.to_tensor(input) | ||
| dataset = Dataset([input] * 2, self.get_transform_func()) | ||
|
|
||
| with pytest.raises(nncf.UnsupportedModelError): | ||
| compress_weights( | ||
| model, | ||
| mode=CompressWeightsMode.INT4_SYM, | ||
| ratio=1.0, | ||
| group_size=1, | ||
| subset_size=2, | ||
| dataset=dataset, | ||
| all_layers=True, | ||
| **kwargs, | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These refactoring changes to inline variable usage are unrelated to the PR's stated goal of enabling transpose_a support for LoRA Correction. While the refactoring is a reasonable style improvement, it should ideally be in a separate commit or PR to keep changes focused and easier to review. Including unrelated refactoring makes it harder to understand the core changes and could complicate any future bisecting or reverting.