openvinotoolkit
diff --git a/‎src/nncf/torch/function_hook/pruning/strip.py‎
Lines changed: 0 additions & 59 deletions b/‎src/nncf/torch/function_hook/pruning/strip.py‎
Lines changed: 0 additions & 59 deletions
diff --git a/‎src/nncf/torch/function_hook/strip.py‎
Lines changed: 27 additions & 21 deletions b/‎src/nncf/torch/function_hook/strip.py‎
Lines changed: 27 additions & 21 deletions
@@ -20,7 +20,8 @@
 from nncf.parameters import StripFormat
 from nncf.torch.function_hook.hook_storage import decode_hook_name
 from nncf.torch.function_hook.nncf_graph.nncf_graph_builder import build_nncf_graph
-from nncf.torch.function_hook.pruning.strip import apply_pruning_in_place
+from nncf.torch.function_hook.pruning.magnitude.modules import UnstructuredPruningMask
+from nncf.torch.function_hook.pruning.rb.modules import RBPruningMask
 from nncf.torch.function_hook.wrapper import get_hook_storage
 from nncf.torch.model_graph_manager import get_const_data
 from nncf.torch.model_graph_manager import get_const_node
@@ -57,7 +58,6 @@ def strip_model(model: TModel, example_input: Any = None, strip_format: StripFor
     elif strip_format == StripFormat.DQ:
         model = replace_quantizer_to_compressed_weight_with_decompressor(model)
     elif strip_format == StripFormat.IN_PLACE:
-        model = apply_pruning_in_place(model)
         model = apply_compression_in_place(model)
     else:
         msg = f"Unsupported strip format: {strip_format}"
@@ -109,6 +109,7 @@ def replace_quantizer_to_torch_native_module(model: TModel, graph: NNCFGraph) ->
     return model
 
 
+@torch.no_grad()
 def replace_quantizer_to_compressed_weight_with_decompressor(model: TModel) -> TModel:
     """
     Performs transformation from fake quantize format (FQ) to dequantization one (DQ):
@@ -136,12 +137,11 @@ def replace_quantizer_to_compressed_weight_with_decompressor(model: TModel) -> T
         module = get_module_by_name(module_name, model)
         weight_param = getattr(module, weight_attr_name)
 
-        with torch.no_grad():
-            if isinstance(hook_module, AsymmetricQuantizer):
-                decompressor, q_weight = asym_fq_to_decompressor(hook_module, weight_param)
-            else:
-                decompressor, q_weight = sym_fq_to_decompressor(hook_module, weight_param)  # type: ignore[assignment]
-            packed_tensor = decompressor.pack_weight(q_weight)
+        if isinstance(hook_module, AsymmetricQuantizer):
+            decompressor, q_weight = asym_fq_to_decompressor(hook_module, weight_param)
+        else:
+            decompressor, q_weight = sym_fq_to_decompressor(hook_module, weight_param)  # type: ignore[assignment]
+        packed_tensor = decompressor.pack_weight(q_weight)
 
         weight_param.requires_grad = False
         weight_param.data = packed_tensor
@@ -150,40 +150,46 @@ def replace_quantizer_to_compressed_weight_with_decompressor(model: TModel) -> T
     return model
 
 
+@torch.no_grad()
 def apply_compression_in_place(model: TModel) -> TModel:
     """
-    Applies fake quantizers in-place to the weights:
-        (weights + FQ) -> (fake quantized weights)
+    Applies NNCF module in-place to the weights:
+        (weights + NNCF module) -> (in-place compressed weights)
 
     :param model: Compressed model
-    :param graph: The model graph.
     :return: The modified NNCF network.
     """
     hook_storage = get_hook_storage(model)
-
     hooks_to_delete = []
     for hook_name, hook_module in hook_storage.named_hooks():
-        if not isinstance(hook_module, (SymmetricQuantizer, AsymmetricQuantizer, BaseWeightsDecompressor)):
+        if not isinstance(
+            hook_module,
+            (RBPruningMask, UnstructuredPruningMask, SymmetricQuantizer, AsymmetricQuantizer, BaseWeightsDecompressor),
+        ):
             continue
+
         hook_module.eval()
+        hook_type, op_name, port_id = decode_hook_name(hook_name)
+        if hook_type != "post_hooks" or port_id != 0:
+            msg = f"Unexpected place of Compression Module: {hook_type=}, {op_name=}, {port_id=}"
+            raise nncf.InternalError(msg)
 
-        _, op_name, _ = decode_hook_name(hook_name)
         module_name, weight_attr_name = split_const_name(op_name)
         module = get_module_by_name(module_name, model)
         weight_param = getattr(module, weight_attr_name)
 
-        with torch.no_grad():
-            if isinstance(hook_module, (SymmetricQuantizer, AsymmetricQuantizer)):
-                fq_weight = hook_module.quantize(weight_param)
-            else:
-                fq_weight = hook_module(weight_param)
+        if not isinstance(weight_param, torch.nn.Parameter):
+            msg = f"Expected torch.nn.Parameter under {op_name}, got {type(weight_param)}."
+            raise nncf.InternalError(msg)
 
         weight_param.requires_grad = False
-        weight_param.data = fq_weight
+        if isinstance(hook_module, (SymmetricQuantizer, AsymmetricQuantizer)):
+            weight_param.data = hook_module.quantize(weight_param)
+        else:
+            weight_param.data = hook_module(weight_param)
 
         hooks_to_delete.append(hook_name)
 
     for hook_name in hooks_to_delete:
         hook_storage.delete_hook(hook_name)
-
     return model