Add API to choose output_names when exporting using onnx

lapid92 · lapid92 · commit 8188f2df470d · 2025-07-01T11:15:48.000+03:00
diff --git a/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py b/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable
+from typing import Callable, Optional, List
 from io import BytesIO
 
 import torch.nn
@@ -64,11 +64,14 @@ def __init__(self,
             self._use_onnx_custom_quantizer_ops = use_onnx_custom_quantizer_ops
             self._onnx_opset_version = onnx_opset_version
 
-        def export(self, output_names=None) -> None:
+        def export(self, output_names: Optional[List[str]] = None) -> None:
             """
             Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
             (namely, weights that are in fake-quant format) and fake-quant layers for the activations.
 
+            Args:
+                output_names (Optional[List[str]]): Optional list of output node names for export compatibility.
+
             Returns:
                 Fake-quant PyTorch model.
             """
@@ -130,6 +133,8 @@ def export(self, output_names=None) -> None:
                     output_names = ['output']
                     dynamic_axes.update({'output': {0: 'batch_size'}})
             else:
+                assert isinstance(output_names, list), \
+                    f"`output_names` must be a list, but got {type(output_names).__name__}"
                 if isinstance(model_output, (list, tuple)):
                     num_of_outputs = len(model_output)
                 else:
diff --git a/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py b/model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py
@@ -49,7 +49,7 @@ def __init__(self,
                          save_model_path,
                          repr_dataset)
 
-    def export(self) -> None:
+    def export(self, output_names=None) -> None:
         """
         Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
         (namely, weights that are in fake-quant format) and fake-quant layers for the activations.
diff --git a/model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py b/model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable
+from typing import Callable, Optional, List
 from packaging import version
 
 from model_compression_toolkit.verify_packages import FOUND_TORCH
@@ -49,7 +49,8 @@ def pytorch_export_model(model: torch.nn.Module,
                              is_layer_exportable_fn: Callable = is_pytorch_layer_exportable,
                              serialization_format: PytorchExportSerializationFormat = PytorchExportSerializationFormat.ONNX,
                              quantization_format: QuantizationFormat = QuantizationFormat.MCTQ,
-                             onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION) -> None:
+                             onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION,
+                             output_names: Optional[List[str]] = None) -> None:
         """
         Export a PyTorch quantized model to a torchscript or onnx model.
         The model will be saved to the path in save_model_path.
@@ -67,6 +68,8 @@ def pytorch_export_model(model: torch.nn.Module,
             PytorchExportSerializationFormat.ONNX).
             quantization_format: Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).
             onnx_opset_version: ONNX opset version to use for exported ONNX model.
+            output_names (Optional[List[str]]): Optional list of output node names for export compatibility.
+            This argument is relevant only when using FakelyQuantONNXPyTorchExporter.
 
         """
         # Ensure 'metadata' is available directly on the model, if present in submodules
@@ -109,7 +112,7 @@ def pytorch_export_model(model: torch.nn.Module,
                 f'Unsupported serialization {serialization_format} was used to export Pytorch model.'
                 f' Please see API for supported formats.')  # pragma: no cover
 
-        exporter.export()
+        exporter.export(output_names=output_names)
 
 else:
     def pytorch_export_model(*args, **kwargs):
diff --git a/tests_pytest/pytorch_tests/e2e_tests/test_exporter.py b/tests_pytest/pytorch_tests/e2e_tests/test_exporter.py
@@ -199,14 +199,28 @@ def _run_mct_qat(self, float_model, rep_dataset, abits, a_qmethod):
         quantized_model = pytorch_quantization_aware_training_finalize_experimental(qat_ready_model)
         return quantized_model
 
-    def _run_exporter(self, quantized_model, rep_dataset, quantization_format):
+    def _run_exporter(self, quantized_model, rep_dataset, quantization_format, output_names=None):
         pytorch_export_model(quantized_model,
                              save_model_path=self.onnx_file,
                              repr_dataset=rep_dataset,
                              serialization_format=PytorchExportSerializationFormat.ONNX,
-                             quantization_format=quantization_format)
+                             quantization_format=quantization_format,
+                             output_names=output_names)
 
-        return onnx_reader(self.onnx_file, quantized_model.linear_activation_holder_quantizer.activation_holder_quantizer)
+        return onnx_reader(self.onnx_file,
+                           quantized_model.linear_activation_holder_quantizer.activation_holder_quantizer)
+
+    def _assert_outputs_names(self, output_names):
+        model = onnx.load(self.onnx_file)
+        exported_output_names = [output.name for output in model.graph.output]
+
+        if output_names is None:
+            if len(exported_output_names) == 1:
+                output_names = ['output']
+            else:
+                output_names = [f"output_{i}" for i in range(len(exported_output_names))]
+        assert all(name in exported_output_names for name in output_names)
+        assert len(output_names) == len(exported_output_names)
 
     def _assert_outputs_match(self, quantized_model, rep_dataset, quantization_format, tol=1e-8):
         pass
@@ -304,6 +318,17 @@ def test_mct_ptq_and_exporter_mctq(self, w_qmethod, abits, a_qmethod, tol):
         self._assert_quant_params_match(quantized_model, onnx_model_dict, a_qmethod, w_qmethod)
         self._assert_outputs_match(quantized_model, self.representative_dataset(1), QuantizationFormat.MCTQ, tol=tol)
 
+    @pytest.mark.parametrize('w_qmethod', [mctq.QuantizationMethod.POWER_OF_TWO])
+    @pytest.mark.parametrize('a_qmethod', [mctq.QuantizationMethod.SYMMETRIC])
+    @pytest.mark.parametrize('abits', [8, 16])
+    @pytest.mark.parametrize('output_names', [None, ['x']])
+    def test_mct_ptq_exporter_mctq_output_names(self, w_qmethod, abits, a_qmethod, output_names):
+        # set_seed(13)
+        quantized_model = self._run_mct(self.get_model(), self.representative_dataset(1), abits, a_qmethod, w_qmethod)
+        onnx_model_dict = self._run_exporter(quantized_model, self.representative_dataset(1), QuantizationFormat.MCTQ,
+                                             output_names=output_names)
+        self._assert_outputs_names(output_names=output_names)
+
     @pytest.mark.parametrize('abits, tol', ([8, 1e-4], [16, 1e-2]))
     def test_mct_ptq_and_exporter_fq(self, abits, tol):
         quantized_model = self._run_mct(self.get_model(), self.representative_dataset(1), abits, mctq.QuantizationMethod.POWER_OF_TWO)
@@ -363,6 +388,25 @@ def forward(self, x):
         self._run_exporter(quantized_model, self.representative_dataset(1), QuantizationFormat.MCTQ)
         self._assert_outputs_match(quantized_model, self.representative_dataset(1), QuantizationFormat.MCTQ)
 
+    @pytest.mark.parametrize('abits', [8, 16])
+    @pytest.mark.parametrize('output_names', [None, ['x', 'y']])
+    def test_multi_output_names_mct_and_exporter_mctq(self, abits, output_names):
+        class MultiOutputModel(torch.nn.Module):
+            def __init__(self, in_channels, out_channels):
+                super().__init__()
+                self.linear = torch.nn.Linear(in_channels, out_channels)
+                self.linear_y = torch.nn.Linear(in_channels, out_channels)
+
+            def forward(self, x):
+                return self.linear(x), self.linear_y(x)
+
+        quantized_model = self._run_mct(MultiOutputModel(self.in_channels, self.out_channels),
+                                        self.representative_dataset(1),
+                                        abits, mctq.QuantizationMethod.POWER_OF_TWO)
+        self._run_exporter(quantized_model, self.representative_dataset(1), QuantizationFormat.MCTQ,
+                           output_names=output_names)
+        self._assert_outputs_names(output_names=output_names)
+
     @pytest.mark.parametrize('abits', [8, 16])
     def test_multi_input_output_mct_and_exporter_mctq(self, abits):
         class MultiInputOutputModel(torch.nn.Module):