Merge branch 'main' into apply_quant_info_to_fusinginfo

kkawa14 · kkawa14 · commit 629f837bd206 · 2025-05-09T16:43:06.000+09:00
diff --git a/model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py b/model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py
@@ -143,14 +143,15 @@ def _get_weights_configurable_quantizer_kwargs(self, n: BaseNode, attr: str) ->
                 'max_candidate_idx': max_candidate_idx
                 }
 
-    def mixed_precision_activation_holder(self, n: BaseNode) -> PytorchActivationQuantizationHolder:
+    def mixed_precision_activation_holder(self, n: BaseNode, holder_type: PytorchActivationQuantizationHolder = PytorchActivationQuantizationHolder) -> PytorchActivationQuantizationHolder:
         """
         Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization for a node.
         The layer should hold either a configurable activation quantizer, if it is quantized with mixed precision,
         or an inferable quantizer for fixed single bit-width quantization.
 
         Args:
             n: Node to get PytorchActivationQuantizationHolder to attach in its output.
+            holder_type: The type of the activation quantization holder to use.
 
         Returns:
             A PytorchActivationQuantizationHolder layer for the node activation quantization.
@@ -192,7 +193,7 @@ def mixed_precision_activation_holder(self, n: BaseNode) -> PytorchActivationQua
         # thus we make sure this is the only possible case (unless it's a node with no activation
         # quantization, which in this case has an empty list).
         if len(activation_quantizers) == 1:
-            return PytorchActivationQuantizationHolder(activation_quantizers[0])
+            return holder_type(activation_quantizers[0])
 
         Logger.critical(f"PytorchActivationQuantizationHolder expects a single quantizer, but ({len(activation_quantizers)}) quantizers were found for node {n}.")# pragma: no cover
 
diff --git a/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py b/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py
@@ -1,4 +1,4 @@
-# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@
 from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
 from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER
-from mct_quantizers import PytorchQuantizationWrapper
+from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder
 
 
 def _build_input_tensors_list(node: BaseNode,
@@ -332,13 +332,21 @@ def _add_modules(self, reused_nodes_only=False):
             else:
                 self.add_module(node.name, node_op)
 
-            # Add activation quantization modules if an activation holder is configured for this node
-            if node.is_activation_quantization_enabled() and self.get_activation_quantizer_holder is not None:
-                activation_quantizer_holder = self.get_activation_quantizer_holder(node)
-                if activation_quantizer_holder is not None:
-                    self.add_module(node.name + '_' + ACTIVATION_HOLDER_QUANTIZER, activation_quantizer_holder)
-                    self.node_to_activation_quantization_holder.update(
-                        {node.name: node.name + '_' + ACTIVATION_HOLDER_QUANTIZER})
+            activation_quantizer_holder = None
+            if self.use_activation_holder_during_model_building:
+                if node.is_activation_quantization_enabled():
+                    activation_quantizer_holder = self.get_activation_quantizer_holder(node, holder_type=PytorchActivationQuantizationHolder)
+                
+                elif node.is_quantization_preserving():
+                    prev_node = self.graph.retrieve_preserved_quantization_node(node)
+                    if prev_node.is_activation_quantization_enabled():
+                        activation_quantizer_holder = self.get_activation_quantizer_holder(prev_node, holder_type=PytorchPreservingActivationQuantizationHolder)
+
+            if activation_quantizer_holder is not None:
+                activation_quantizer_holder_name = node.name + '_' + ACTIVATION_HOLDER_QUANTIZER
+                self.add_module(activation_quantizer_holder_name, activation_quantizer_holder)
+                self.node_to_activation_quantization_holder.update(
+                    {node.name: activation_quantizer_holder_name})
 
     def forward(self,
                 *args: Any) -> Any:
diff --git a/model_compression_toolkit/core/pytorch/reader/reader.py b/model_compression_toolkit/core/pytorch/reader/reader.py
@@ -23,7 +23,7 @@
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.pytorch.reader.graph_builders import edges_builder, nodes_builder
 from model_compression_toolkit.core.pytorch.utils import set_model
-from sony_custom_layers.pytorch import CustomLayer
+from edgemdt_cl.pytorch import CustomLayer
 
 
 def _trace_model(root: Union[torch.nn.Module, Callable[..., Any]]) -> GraphModule:
diff --git a/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py b/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py
@@ -100,8 +100,7 @@ def export(self, output_names=None) -> None:
             model_output = self.model(*model_input) if isinstance(model_input, (list, tuple)) else self.model(
                 model_input)
 
-            input_nodes = [n for n in self.model.node_sort if n.type == DummyPlaceHolder]
-            input_names = [f"input_{i}" for i in range(len(input_nodes))] if len(input_nodes) > 1 else ["input"]
+            input_names = [f"input_{i}" for i in range(len(model_input))] if len(model_input) > 1 else ["input"]
             dynamic_axes = {name: {0: 'batch_size'} for name in input_names}
             if output_names is None:
                 # Determine number of outputs and prepare output_names and dynamic_axes
diff --git a/model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py b/model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py
@@ -1,4 +1,4 @@
-# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@
 
 if FOUND_TORCH:
     import torch
-    from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
+    from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder
     from mct_quantizers.common.constants import OP_CALL_ARGS, OP_CALL_KWARGS
     from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
     from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
@@ -65,22 +65,26 @@ def fully_quantized_wrapper(node: common.BaseNode,
         return module
 
 
-    def get_activation_quantizer_holder(node: BaseNode, fw_impl) -> Callable:
+    def get_activation_quantizer_holder(node: BaseNode, holder_type: PytorchActivationQuantizationHolder, fw_impl) -> Callable:
         """
         Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
         If the layer is not supposed to be wrapped with an activation quantizer - return None.
         Args:
             node: Node to attach a PytorchActivationQuantizationHolder to its output.
+            holder_type: The type of the activation quantization holder to use.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         Returns:
             A PytorchActivationQuantizationHolder module for the node's activation quantization.
         """
-        _, activation_quantizers = fw_impl.get_inferable_quantizers(node)
         # Holder by definition uses a single quantizer for the activation quantization
         # thus we make sure this is the only possible case (unless it's a node we no activation
         # quantization, which in this case has an empty list).
+        _, activation_quantizers = fw_impl.get_inferable_quantizers(node)
         if len(activation_quantizers) == 1:
-            return PytorchActivationQuantizationHolder(activation_quantizers[0])
+            if holder_type == PytorchActivationQuantizationHolder:
+                return holder_type(activation_quantizers[0])
+            elif holder_type == PytorchPreservingActivationQuantizationHolder:
+                return holder_type(activation_quantizers[0], quantization_bypass=True)
         Logger.critical(
             f'PytorchActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers '
             f'were found for node {node}')
@@ -96,13 +100,14 @@ def get_exportable_pytorch_model(graph: Graph):
         Returns:
             Fully quantized PyTorch model.
         """
+        fw_impl = C.pytorch.pytorch_implementation.PytorchImplementation()
         exportable_model, user_info = PyTorchModelBuilder(graph=graph,
                                                           wrapper=lambda n, m:
                                                           fully_quantized_wrapper(n, m,
-                                                                                  fw_impl=C.pytorch.pytorch_implementation.PytorchImplementation()),
-                                                          get_activation_quantizer_holder_fn=lambda n:
-                                                          get_activation_quantizer_holder(n,
-                                                                                          fw_impl=C.pytorch.pytorch_implementation.PytorchImplementation())).build_model()
+                                                                                  fw_impl=fw_impl),
+                                                          get_activation_quantizer_holder_fn=lambda n, holder_type:
+                                                          get_activation_quantizer_holder(n, holder_type,
+                                                                                          fw_impl=fw_impl)).build_model()
 
         Logger.info("\nPlease run your accuracy evaluation on the exported quantized model to verify it's accuracy.\n"
                     "Checkout the FAQ and Troubleshooting pages for resolving common issues and improving the quantized model accuracy:\n"
diff --git a/model_compression_toolkit/gptq/pytorch/gptq_training.py b/model_compression_toolkit/gptq/pytorch/gptq_training.py
@@ -197,11 +197,12 @@ def gptq_wrapper(self,
         #  quantized, do we need to wrap them as well?
         return layer
 
-    def get_activation_quantizer_holder(self, n: BaseNode) -> Callable:
+    def get_activation_quantizer_holder(self, n: BaseNode, holder_type: PytorchActivationQuantizationHolder = PytorchActivationQuantizationHolder) -> Callable:
         """
         Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
         Args:
             n: Node to attach a PytorchActivationQuantizationHolder to its output.
+            holder_type: The type of the activation quantization holder to use.
         Returns:
             A PytorchActivationQuantizationHolder module for the node's activation quantization.
         """
@@ -213,7 +214,7 @@ def get_activation_quantizer_holder(self, n: BaseNode) -> Callable:
                             f"but {len(activation_quantizers)} were found for node {n.name}. "
                             f"Ensure the node is configured with a single activation quantizer.")
         quantizer = self.gradual_act_quantizer_wrapper_factory(activation_quantizers[0])
-        return PytorchActivationQuantizationHolder(quantizer)
+        return holder_type(quantizer)
 
     def build_gptq_model(self):
         """
diff --git a/model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py b/model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py
@@ -30,14 +30,15 @@
 
 
 def get_activation_quantizer_holder(n: common.BaseNode,
-                                    qat_config: QATConfig) -> Callable:
+                                    qat_config: QATConfig, holder_type: PytorchActivationQuantizationHolder = PytorchActivationQuantizationHolder) -> Callable:
     """
     Retrieve a ActivationQuantizationHolder layer to use for activation quantization for a node.
     If the layer is not supposed to be wrapped with activation quantizers - return None.
 
     Args:
         n: Node for which to retrieve anActivationQuantizationHolder to attach to its output.
         qat_config: QAT configuration (for example, training methods).
+        holder_type: The type of the activation quantization holder to use.
 
     Returns:
         A ActivationQuantizationHolder layer for the node's activation quantization.
@@ -49,7 +50,7 @@ def get_activation_quantizer_holder(n: common.BaseNode,
     # thus we make sure this is the only possible case (unless it's a node with no activation
     # quantization, which in this case has an empty list).
     if len(activation_quantizers) == 1:
-        return PytorchActivationQuantizationHolder(activation_quantizers[0])
+        return holder_type(activation_quantizers[0])
     Logger.critical(f'ActivationQuantizationHolder supports only a single quantizer, but ({len(activation_quantizers)}) quantizers were found for node {n}.')
 
 
diff --git a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2keras.py b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2keras.py
@@ -20,7 +20,7 @@
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2fw import \
     AttachTpcToFramework
 
-from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
+from edgemdt_cl.keras.object_detection.ssd_post_process import SSDPostProcess
 
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers import Conv2D, DepthwiseConv2D, Dense, Reshape, ZeroPadding2D, Dropout, \
diff --git a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2pytorch.py b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2pytorch.py
@@ -32,7 +32,7 @@
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2fw import \
     AttachTpcToFramework
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attribute_filter import Eq
-from sony_custom_layers.pytorch import MulticlassNMS, MulticlassNMSWithIndices
+from edgemdt_cl.pytorch import MulticlassNMS, MulticlassNMSWithIndices
 
 
 class AttachTpcToPytorch(AttachTpcToFramework):
diff --git a/requirements.txt b/requirements.txt
@@ -11,5 +11,5 @@ scipy
 protobuf
 mct-quantizers-nightly
 pydantic>=2.0
-sony-custom-layers-dev==0.4.0.dev6
+edge-mdt-cl-dev
 
diff --git a/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py b/tests/keras_tests/custom_layers_tests/test_sony_ssd_postprocess_layer.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 
 import model_compression_toolkit as mct
-from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
+from edgemdt_cl.keras.object_detection.ssd_post_process import SSDPostProcess
 from mct_quantizers.keras.metadata import MetadataLayer
 from tests.common_tests.helpers.tpcs_for_tests.v4.tpc import get_tpc
 
diff --git a/tests_pytest/pytorch_tests/e2e_tests/test_quantization_preserving_holder.py b/tests_pytest/pytorch_tests/e2e_tests/test_quantization_preserving_holder.py
diff --git a/tests_pytest/pytorch_tests/unit_tests/core/back2framework/test_pytorch_model_builder.py b/tests_pytest/pytorch_tests/unit_tests/core/back2framework/test_pytorch_model_builder.py
diff --git a/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/builder/test_get_activation_quantizer_holder_quantization_preserving.py b/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/builder/test_get_activation_quantizer_holder_quantization_preserving.py