Add program-data separation to pybindings (#13886)

lucylq · facebook-github-bot · commit 5b4cb9ae24de · 2025-09-02T16:42:47.000-07:00
Summary:

Add support for optional data path for pybindings.

Differential Revision: D76353209
diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp
@@ -173,27 +173,51 @@ inline std::unique_ptr<Module> load_module_from_buffer(
 }
 
 inline std::unique_ptr<Module> load_module_from_file(
-    const std::string& path,
+    const std::string& program_path,
+    std::optional<const std::string>& data_map_path,
     bool enable_etdump,
     size_t debug_buffer_size,
     Program::Verification program_verification) {
   EXECUTORCH_SCOPE_PROF("load_module_from_file");
 
-  Result<MmapDataLoader> res = MmapDataLoader::from(
-      path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
+  Result<MmapDataLoader> program_loader_res = MmapDataLoader::from(
+      program_path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
   THROW_IF_ERROR(
-      res.error(),
+      program_loader_res.error(),
       "Failed to create MmapDataLoader from file %s, error: 0x:%" PRIx32,
-      path.c_str(),
-      static_cast<uint32_t>(res.error()));
-
-  auto loader = std::make_unique<MmapDataLoader>(std::move(res.get()));
-  return std::make_unique<Module>(
-      std::move(loader),
-      nullptr, // memory_allocator
-      nullptr, // temp_allocator
-      enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
-      nullptr); // data_map_loader
+      program_path.c_str(),
+      static_cast<uint32_t>(program_loader_res.error()));
+  auto program_loader =
+      std::make_unique<MmapDataLoader>(std::move(program_loader_res.get()));
+
+  if (data_map_path.has_value()) {
+    Result<MmapDataLoader> data_map_loader_res = MmapDataLoader::from(
+        data_map_path->c_str(),
+        MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
+    THROW_IF_ERROR(
+        data_map_loader_res.error(),
+        "Failed to create MmapDataLoader from file %s, error: 0x:%" PRIx32,
+        data_map_path->c_str(),
+        static_cast<uint32_t>(data_map_loader_res.error()));
+    auto data_map_loader =
+        std::make_unique<MmapDataLoader>(std::move(data_map_loader_res.get()));
+
+    return std::make_unique<Module>(
+        std::move(program_loader),
+        nullptr, // memory_allocator
+        nullptr, // temp_allocator
+        enable_etdump ? std::make_unique<torch::executor::ETDumpGen>()
+                      : nullptr,
+        std::move(data_map_loader)); // data_map_loader
+  } else {
+    return std::make_unique<Module>(
+        std::move(program_loader),
+        nullptr, // memory_allocator
+        nullptr, // temp_allocator
+        enable_etdump ? std::make_unique<torch::executor::ETDumpGen>()
+                      : nullptr,
+        nullptr); // data_map_loader
+  }
 }
 
 inline py::list get_outputs_as_py_list(
@@ -495,13 +519,15 @@ struct PyModule final {
             program_verification)) {}
 
   explicit PyModule(
-      const std::string& path,
+      const std::string& program_path,
+      std::optional<const std::string>& data_path,
       bool enable_etdump,
       size_t debug_buffer_size = 0,
       Program::Verification program_verification =
           Program::Verification::InternalConsistency)
       : module_(load_module_from_file(
-            path,
+            program_path,
+            data_path,
             enable_etdump,
             debug_buffer_size,
             program_verification)) {}
@@ -521,14 +547,20 @@ struct PyModule final {
     return std::make_unique<PyModule>(
         buffer, enable_etdump, debug_buffer_size, program_verification);
   }
+
   static std::unique_ptr<PyModule> load_from_file(
-      const std::string& path,
+      const std::string& program_path,
+      std::optional<const std::string>& data_path,
       bool enable_etdump,
       size_t debug_buffer_size = 0,
       Program::Verification program_verification =
           Program::Verification::InternalConsistency) {
     return std::make_unique<PyModule>(
-        path, enable_etdump, debug_buffer_size, program_verification);
+        program_path,
+        data_path,
+        enable_etdump,
+        debug_buffer_size,
+        program_verification);
   }
 
   static std::unique_ptr<PyModule> load_from_bundled_program(
@@ -1301,7 +1333,8 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
   m.def(
       "_load_for_executorch",
       PyModule::load_from_file,
-      py::arg("path"),
+      py::arg("program_path"),
+      py::arg("data_path") = std::nullopt,
       py::arg("enable_etdump") = false,
       py::arg("debug_buffer_size") = 0,
       py::arg("program_verification") =
diff --git a/extension/pybindings/pybindings.pyi b/extension/pybindings/pybindings.pyi
@@ -147,7 +147,8 @@ class MethodMeta:
 
 @experimental("This API is experimental and subject to change without notice.")
 def _load_for_executorch(
-    path: str,
+    program_path: str,
+    data_path: Optional[str] = None,
     enable_etdump: bool = False,
     debug_buffer_size: int = 0,
     program_verification: Verification = Verification.InternalConsistency,
@@ -159,7 +160,8 @@ def _load_for_executorch(
         This API is experimental and subject to change without notice.
 
     Args:
-        path: File path to the ExecuTorch program as a string.
+        program_path: File path to the ExecuTorch program as a string.
+        data_path: File path to a .ptd file containing data used by the program.
         enable_etdump: If true, enables an ETDump which can store profiling information.
             See documentation at https://pytorch.org/executorch/main/etdump
             for how to use it.
diff --git a/extension/pybindings/test/make_test.py b/extension/pybindings/test/make_test.py
@@ -133,6 +133,21 @@ def get_inputs(self):
         return (torch.ones(2, 2), torch.ones(2, 2))
 
 
+class ModuleLinear(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(3, 3)
+
+    def forward(self, x: torch.Tensor):
+        return self.linear(x)
+
+    def get_methods_to_export(self):
+        return ("forward",)
+
+    def get_inputs(self):
+        return (torch.randn(3),)
+
+
 def create_program(
     eager_module: torch.nn.Module,
     et_config: Optional[ExecutorchBackendConfig] = None,
diff --git a/extension/pybindings/test/test_pybindings.py b/extension/pybindings/test/test_pybindings.py
@@ -22,6 +22,7 @@
     ModuleAddWithAttributes,
     ModuleChannelsLast,
     ModuleChannelsLastInDefaultOut,
+    ModuleLinear,
     ModuleMulti,
 )
 from torch.export import export
@@ -600,3 +601,35 @@ def test_method_method_meta(self) -> None:
         self.assertEqual(output_tensor.is_memory_planned(), True)
         self.assertEqual(output_tensor.nbytes(), 16)
         self.assertEqual(str(output_tensor), tensor_info)
+
+    def test_program_data_separation(self) -> None:
+        eager_module = ModuleLinear()
+        inputs = eager_module.get_inputs()
+        exported_program = export(eager_module, inputs, strict=True)
+        exec_program = to_edge(exported_program).to_executorch(
+            config=ExecutorchBackendConfig(
+                # Move all tensor data to '_default_external_constant' file.
+                external_constants=True,
+            )
+        )
+
+        import os
+        import tempfile
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            pte_file = os.path.join(tmpdir, "linear.pte")
+            with open(pte_file, "wb") as f:
+                f.write(exec_program.buffer)
+
+            ptd_file = os.path.join(tmpdir, "linear.ptd")
+            with open(ptd_file, "wb") as ptd:
+                tensor_data = bytes(
+                    exec_program._tensor_data.pop("_default_external_constant")
+                )
+                ptd.write(tensor_data)
+
+            executorch_program = self.runtime._load_for_executorch(pte_file, ptd_file)
+
+            expected = eager_module(inputs[0])
+            executorch_output = executorch_program.forward(inputs)[0]
+            self.assertTrue(torch.allclose(expected, executorch_output))