fix: embedding layer support in client server (#1149)

jfrery · web-flow · commit d4c1c8cbe35b · 2025-10-28T14:49:27.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -153,6 +153,10 @@ filterwarnings = [
     "ignore:`np\\.object` is a deprecated alias for the builtin `object`\\. To silence this warning, use `object` by itself\\. Doing this will not modify any behavior and is safe\\.:DeprecationWarning",
     "ignore:Using or importing the ABCs from 'collections' instead of from 'collections\\.abc' is deprecated.*:DeprecationWarning",
     "ignore: distutils Version classes are deprecated. Use packaging\\.version instead.*:DeprecationWarning",
+    "ignore:The distutils package is deprecated and slated for removal in Python 3\\.12.*:DeprecationWarning",
+    "ignore:Distutils was imported before Setuptools.*:UserWarning",
+    "ignore:Setuptools is replacing distutils.*:UserWarning",
+    "ignore:The distutils\\.sysconfig module is deprecated.*:DeprecationWarning",
     "ignore: forcing n_jobs = 1 on mac for segfault issue",
     "ignore: allowzero=0 by default.*:UserWarning",
     "ignore:Implicitly cleaning up:ResourceWarning",
diff --git a/src/concrete/ml/deployment/fhe_client_server.py b/src/concrete/ml/deployment/fhe_client_server.py
@@ -18,6 +18,7 @@
 from ..common.serialization.loaders import load
 from ..common.utils import CiphertextFormat, to_tuple
 from ..quantization import QuantizedModule
+from ..torch.numpy_module import NumpyModule
 from ..version import __version__ as CML_VERSION
 from ._utils import deserialize_encrypted_values, serialize_encrypted_values
 
@@ -270,8 +271,13 @@ def _export_model_to_json(self, is_training: bool = False) -> Path:
             "output_quantizers": module_to_export.output_quantizers,
             "is_training": is_training,
             "ciphertext_format": module_to_export.ciphertext_format,
+            "onnx_preprocessing": None,
         }
 
+        preprocessing_module = getattr(module_to_export, "_preprocessing_module", None)
+        if preprocessing_module is not None:
+            serialized_processing["onnx_preprocessing"] = preprocessing_module.onnx_model
+
         # Export the `is_fitted` attribute for built-in models
         if hasattr(self.model, "is_fitted"):
             serialized_processing["is_fitted"] = self.model.is_fitted
@@ -416,6 +422,13 @@ def load(self):  # pylint: disable=no-value-for-parameter
 
         self.model.ciphertext_format = serialized_processing["ciphertext_format"]
 
+        if hasattr(self.model, "_preprocessing_module"):
+            onnx_preprocessing = serialized_processing.get("onnx_preprocessing")
+            # pylint: disable-next=protected-access
+            self.model._preprocessing_module = (
+                NumpyModule(onnx_preprocessing) if onnx_preprocessing is not None else None
+            )
+
         # Load model parameters
         # Add some checks on post-processing-params
         # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3131
@@ -487,6 +500,14 @@ def quantize_encrypt_serialize(
             Union[bytes, Tuple[bytes, ...]]: The quantized, encrypted and serialized values.
         """
 
+        # Apply the same preprocessing as during standard forward passes when available so that
+        # inputs expected by the FHE circuit (e.g., one-hot vectors for optimized embeddings) are
+        # generated here too.
+        if hasattr(self.model, "pre_processing"):
+            x = to_tuple(self.model.pre_processing(*x))
+        else:
+            x = to_tuple(x)
+
         # Quantize the values
         x_quant = to_tuple(self.model.quantize_input(*x))
 
diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
@@ -10,6 +10,7 @@
 
 import numpy
 import pytest
+import torch
 from torch import nn
 
 from concrete import fhe
@@ -20,7 +21,7 @@
     FHEModelDev,
     FHEModelServer,
 )
-from concrete.ml.pytest.torch_models import FCSmall
+from concrete.ml.pytest.torch_models import EmbeddingModel, FCSmall
 from concrete.ml.pytest.utils import (
     MODELS_AND_DATASETS,
     _get_sklearn_tree_models,
@@ -283,6 +284,51 @@ def test_client_server_custom_model(
     )
 
 
+def test_client_server_torch_embedding_model(default_configuration):
+    """Ensure client/server flow works for a torch model containing an embedding."""
+
+    torch.manual_seed(0)
+    num_embeddings = 6
+    embedding_dim = 3
+    seq_len = 2
+
+    torch_model = EmbeddingModel(num_embeddings, embedding_dim)
+    torch_model.eval()
+
+    torch_inputset = torch.randint(0, num_embeddings, size=(8, seq_len)).long()
+    sample = torch_inputset[:1].numpy()
+
+    quantized_module = compile_torch_model(
+        torch_model,
+        torch_inputset,
+        configuration=default_configuration,
+        n_bits=2,
+        rounding_threshold_bits=2,
+    )
+
+    network = OnDiskNetwork()
+    fhe_model_dev = FHEModelDev(path_dir=network.dev_dir.name, model=quantized_module)
+    fhe_model_dev.save()
+    network.dev_send_clientspecs_and_modelspecs_to_client()
+    network.dev_send_model_to_server()
+
+    key_dir = default_configuration.insecure_key_cache_location
+    fhe_model_client = FHEModelClient(path_dir=network.client_dir.name, key_dir=key_dir)
+    fhe_model_client.generate_private_and_evaluation_keys(force=True)
+    evaluation_keys = fhe_model_client.get_serialized_evaluation_keys(include_tfhers_key=False)
+    fhe_model_server = FHEModelServer(path_dir=network.server_dir.name)
+
+    q_x_encrypted_serialized = fhe_model_client.quantize_encrypt_serialize(sample)
+    q_y_encrypted_serialized = fhe_model_server.run(q_x_encrypted_serialized, evaluation_keys)
+
+    client_result = fhe_model_client.deserialize_decrypt_dequantize(
+        *to_tuple(q_y_encrypted_serialized)
+    )
+    simulate_result = quantized_module.forward(sample, fhe="simulate")
+
+    numpy.testing.assert_allclose(client_result, simulate_result, atol=1e-2)
+
+
 def check_client_server_files(model, mode="inference"):
     """Test the client server interface API generates the expected file.