fix torch module wrapper serialization error (#21505)

divyashreepathihalli · web-flow · commit e704b460a5c9 · 2025-07-25T14:12:57.000-07:00
* fix torch module wrapper serialization error

* make fix narrower

* address review comments

* fix gpu tests

* fix error
diff --git a/keras/src/utils/torch_utils.py b/keras/src/utils/torch_utils.py
@@ -1,3 +1,4 @@
+import base64
 import io
 
 from packaging.version import parse
@@ -152,8 +153,10 @@ def get_config(self):
 
         buffer = io.BytesIO()
         torch.save(self.module, buffer)
+        # Encode the buffer using base64 to ensure safe serialization
+        buffer_b64 = base64.b64encode(buffer.getvalue()).decode("ascii")
         config = {
-            "module": buffer.getvalue(),
+            "module": buffer_b64,
             "output_shape": self.output_shape,
         }
         return {**base_config, **config}
@@ -163,7 +166,9 @@ def from_config(cls, config):
         import torch
 
         if "module" in config:
-            buffer = io.BytesIO(config["module"])
+            # Decode the base64 string back to bytes
+            buffer_bytes = base64.b64decode(config["module"].encode("ascii"))
+            buffer = io.BytesIO(buffer_bytes)
             config["module"] = torch.load(buffer, weights_only=False)
         return cls(**config)
 
diff --git a/keras/src/utils/torch_utils_test.py b/keras/src/utils/torch_utils_test.py
@@ -11,6 +11,7 @@
 from keras.src import models
 from keras.src import saving
 from keras.src import testing
+from keras.src.backend.torch.core import get_device
 from keras.src.utils.torch_utils import TorchModuleWrapper
 
 
@@ -246,3 +247,27 @@ def test_build_model(self):
         model = keras.Model(x, y)
         self.assertEqual(model.predict(np.zeros([5, 4])).shape, (5, 16))
         self.assertEqual(model(np.zeros([5, 4])).shape, (5, 16))
+
+    def test_save_load(self):
+        @keras.saving.register_keras_serializable()
+        class M(keras.Model):
+            def __init__(self, channels=10, **kwargs):
+                super().__init__()
+                self.sequence = torch.nn.Sequential(
+                    torch.nn.Conv2d(1, channels, kernel_size=(3, 3)),
+                )
+
+            def call(self, x):
+                return self.sequence(x)
+
+        m = M()
+        device = get_device()  # Get the current device (e.g., "cuda" or "cpu")
+        x = torch.ones(
+            (10, 1, 28, 28), device=device
+        )  # Place input on the correct device
+        m(x)
+        temp_filepath = os.path.join(self.get_temp_dir(), "mymodel.keras")
+        m.save(temp_filepath)
+        new_model = saving.load_model(temp_filepath)
+        for ref_w, new_w in zip(m.get_weights(), new_model.get_weights()):
+            self.assertAllClose(ref_w, new_w, atol=1e-5)