[Torch] In-place strip for WeightDecompressor classes (#3709)

ljaljushkin · web-flow · commit 61fc34af50fe · 2025-11-05T23:17:32.000+02:00
### Changes

Extended in_place strip for CompressionFormat.DQ

### Reason for changes

faster evaluation compressed models in torch

### Related tickets

n/a

### Tests

test_nncf_in_place_strip
diff --git a/src/nncf/torch/function_hook/strip.py b/src/nncf/torch/function_hook/strip.py
@@ -27,6 +27,7 @@
 from nncf.torch.model_graph_manager import split_const_name
 from nncf.torch.quantization.layers import AsymmetricQuantizer
 from nncf.torch.quantization.layers import BaseQuantizer
+from nncf.torch.quantization.layers import BaseWeightsDecompressor
 from nncf.torch.quantization.layers import SymmetricQuantizer
 from nncf.torch.quantization.strip import asym_fq_to_decompressor
 from nncf.torch.quantization.strip import convert_to_torch_fakequantizer
@@ -167,7 +168,7 @@ def apply_compression_in_place(model: TModel, graph: NNCFGraph) -> TModel:
 
     hooks_to_delete = []
     for name, hook in hook_storage.named_hooks():
-        if not isinstance(hook, (SymmetricQuantizer, AsymmetricQuantizer)):
+        if not isinstance(hook, (SymmetricQuantizer, AsymmetricQuantizer, BaseWeightsDecompressor)):
             continue
         _, op_name, _ = decode_hook_name(name)
         weight_node = graph.get_node_by_name(op_name)
@@ -181,7 +182,7 @@ def apply_compression_in_place(model: TModel, graph: NNCFGraph) -> TModel:
             raise nncf.InternalError(msg)
 
         weight = get_const_data(weight_node, model)
-        fq_weight = hook.quantize(weight)
+        fq_weight = hook(weight) if isinstance(hook, BaseWeightsDecompressor) else hook.quantize(weight)
 
         module_name, weight_attr_name = split_const_name(weight_node.layer_attributes.name)
         module = get_module_by_name(module_name, model)
diff --git a/tests/torch2/function_hook/quantization/strip/test_strip_in_place.py b/tests/torch2/function_hook/quantization/strip/test_strip_in_place.py
@@ -23,6 +23,7 @@
 from nncf.parameters import StripFormat
 from nncf.torch.function_hook.wrapper import get_hook_storage
 from nncf.torch.quantization.layers import BaseQuantizer
+from nncf.torch.quantization.layers import BaseWeightsDecompressor
 from tests.torch.helpers import LinearModel
 from tests.torch2.function_hook.quantization.strip.test_strip_dequantize import check_compression_modules
 
@@ -45,6 +46,16 @@ def extra_arguments(self) -> dict[str, Any]:
             args["group_size"] = -1
         return args
 
+    @property
+    def compression_class(self) -> Any:
+        return BaseWeightsDecompressor if self.compression_format == CompressionFormat.DQ else BaseQuantizer
+
+    @property
+    def compression_dtype(self) -> Any:
+        if self.compression_format == CompressionFormat.DQ:
+            return torch.int8 if self.mode == CompressWeightsMode.INT8_SYM else torch.uint8
+        return self.torch_dtype
+
 
 @pytest.mark.parametrize(
     "param",
@@ -57,7 +68,7 @@ def extra_arguments(self) -> dict[str, Any]:
                 CompressWeightsMode.INT8_ASYM,
                 CompressWeightsMode.INT8_SYM,
             ],
-            [CompressionFormat.FQ_LORA, CompressionFormat.FQ],
+            [CompressionFormat.FQ_LORA, CompressionFormat.FQ, CompressionFormat.DQ],
             [torch.float32, torch.float16, torch.bfloat16],
         )
     ],
@@ -77,8 +88,8 @@ def test_nncf_in_place_strip(param: ParamInPlaceStrip):
         **param.extra_arguments,
     )
 
-    check_compression_modules(compressed_model, expected_class=BaseQuantizer)
-    assert compressed_model.linear.weight.dtype == param.torch_dtype
+    check_compression_modules(compressed_model, expected_class=param.compression_class)
+    assert compressed_model.linear.weight.dtype == param.compression_dtype
 
     with torch.no_grad():
         compressed_output = compressed_model(example_input)