Extend PyBundledModule with extension.BundledModule (#12839)

Gasoonjia · facebook-github-bot · commit 97617fffcbb5 · 2025-07-29T11:46:05.000-07:00
Summary: Pull Request resolved: #12839 Differential Revision: D78938344
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -707,6 +707,14 @@ if(EXECUTORCH_BUILD_PYBIND)
       torch
   )
 
+  if(EXECUTORCH_BUILD_EXTENSION_MODULE)
+    if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID OR APPLE)
+      list(APPEND _dep_libs extension_module_static)
+    else()
+      list(APPEND _dep_libs extension_module)
+    endif()
+  endif()
+
   if(EXECUTORCH_BUILD_TESTS)
     list(APPEND _dep_libs test_backend_compiler_lib)
   endif()
@@ -764,6 +772,17 @@ if(EXECUTORCH_BUILD_PYBIND)
   target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
   target_link_libraries(portable_lib PRIVATE ${_dep_libs})
 
+  # Set RPATH for portable_lib to find libextension_module.so
+  if(APPLE)
+    set_target_properties(portable_lib PROPERTIES
+                          BUILD_RPATH "@loader_path"
+                          INSTALL_RPATH "@loader_path")
+  else()
+    set_target_properties(portable_lib PROPERTIES
+                          BUILD_RPATH "$ORIGIN"
+                          INSTALL_RPATH "$ORIGIN")
+  endif()
+
   install(
     TARGETS portable_lib
     EXPORT ExecuTorchTargets
diff --git a/devtools/bundled_program/test/test_end2end.py b/devtools/bundled_program/test/test_end2end.py
@@ -5,21 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # flake8: noqa: F401
-import functools
-import inspect
-import os
-import random
 import unittest
-from typing import Callable, Dict, Optional, Tuple, Type
-
-import executorch.exir as exir
-
-import executorch.exir.control_flow as control_flow
-
-# @manual=//executorch/extension/pytree:pybindings
-import executorch.extension.pytree as pytree
-
-import torch
 
 from executorch.devtools.bundled_program.core import BundledProgram
 from executorch.devtools.bundled_program.serialize import (
@@ -35,8 +21,6 @@
 try:
     from executorch.extension.pybindings.portable_lib import (
         _load_bundled_program_from_buffer,
-        _load_for_executorch_from_buffer,
-        _load_for_executorch_from_bundled_program,
     )
 
     kernel_mode = "lean"
@@ -47,8 +31,6 @@
 try:
     from executorch.extension.pybindings.aten_lib import (  # @manual=//executorch/extension/pybindings:aten_lib
         _load_bundled_program_from_buffer,
-        _load_for_executorch_from_buffer,
-        _load_for_executorch_from_bundled_program,
     )
 
     assert kernel_mode is None
@@ -75,19 +57,8 @@ def test_sample_model_e2e(self):
             bundled_program_buffer
         )
 
-        executorch_module = _load_for_executorch_from_bundled_program(
-            executorch_bundled_program
-        )
-
         for method_name in eager_model.method_names:
-            executorch_module.load_bundled_input(
-                executorch_bundled_program,
-                method_name,
-                0,
-            )
-            executorch_module.plan_execute(method_name)
-            executorch_module.verify_result_with_bundled_expected_output(
-                executorch_bundled_program,
+            executorch_bundled_program.verify_result_with_bundled_expected_output(
                 method_name,
                 0,
             )
diff --git a/extension/pybindings/README.md b/extension/pybindings/README.md
@@ -27,8 +27,6 @@ CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
 - `_reset_profile_results()`: Reset profile results.
 ## Classes
 ### ExecuTorchModule
-- `load_bundled_input()`: Load bundled input.
-- `verify_result_with_bundled_expected_output(bundle: str, method_name: str, testset_idx: int, rtol: float = 1e-5, atol: float = 1e-8)`: Verify result with bundled expected output.
 - `plan_execute()`: Plan and execute.
 - `run_method()`: Run method.
 - `forward()`: Forward. This takes a pytree-flattend PyTorch-tensor-based input.
@@ -37,5 +35,6 @@ CMAKE_ARGS="-DEXECUTORCH_BUILD_MPS=ON" ./install_executorch.sh
 - `__call__()`: Call method.
 ### BundledModule
 This class is currently empty and serves as a placeholder for future methods and attributes.
+- `verify_result_with_bundled_expected_output(method_name: str, testset_idx: int, rtol: float = 1e-5, atol: float = 1e-8)`: Verify result with bundled expected output.
 ## Note
 All functions and methods are guarded by a call guard that redirects `cout` and `cerr` to the Python environment.
diff --git a/extension/pybindings/pybindings.cpp b/extension/pybindings/pybindings.cpp
@@ -23,6 +23,7 @@
 #include <executorch/extension/data_loader/buffer_data_loader.h>
 #include <executorch/extension/data_loader/mmap_data_loader.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
+#include <executorch/extension/module/bundled_module.h>
 #include <executorch/extension/threadpool/threadpool.h>
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/data_loader.h>
@@ -81,6 +82,7 @@ using ::executorch::ET_RUNTIME_NAMESPACE::Program;
 using ::executorch::extension::BufferDataLoader;
 using ::executorch::extension::MallocMemoryAllocator;
 using ::executorch::extension::MmapDataLoader;
+using ::executorch::extension::ET_BUNDLED_MODULE_NAMESPACE::BundledModule;
 using ::executorch::runtime::ArrayRef;
 using ::executorch::runtime::DataLoader;
 using ::executorch::runtime::Error;
@@ -425,13 +427,54 @@ inline std::unique_ptr<Module> load_module_from_file(
       program_verification);
 }
 
+inline py::list get_outputs_as_py_list(
+    const std::vector<EValue>& outputs,
+    bool clone_outputs = true) {
+  const auto outputs_size = outputs.size();
+  py::list list(outputs_size);
+  for (size_t i = 0; i < outputs_size; ++i) {
+    auto& v = outputs[i];
+    if (Tag::None == v.tag) {
+      list[i] = py::none();
+    } else if (Tag::Int == v.tag) {
+      list[i] = py::cast(v.toInt());
+    } else if (Tag::Double == v.tag) {
+      list[i] = py::cast(v.toDouble());
+    } else if (Tag::Bool == v.tag) {
+      list[i] = py::cast(v.toBool());
+    } else if (Tag::String == v.tag) {
+      list[i] = py::cast(std::string(v.toString().data()));
+    } else if (Tag::Tensor == v.tag) {
+#ifdef USE_ATEN_LIB
+      // Clone so the outputs in python do not share a lifetime with the
+      // module object
+      if (clone_outputs) {
+        list[i] = py::cast(v.toTensor().clone());
+      } else {
+        list[i] = py::cast(v.toTensor());
+      }
+#else
+      if (clone_outputs) {
+        list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()).clone());
+      } else {
+        list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()));
+      }
+#endif
+    } else {
+      ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
+    }
+  }
+  return list;
+}
+
 static constexpr size_t kDEFAULT_BUNDLED_INPUT_POOL_SIZE = 16 * 1024U;
 
-struct PyBundledModule final {
+struct PyBundledModule : public BundledModule {
   explicit PyBundledModule(
       const py::bytes& buffer,
       uint32_t bundled_input_pool_size)
-      : bundled_program_ptr_(buffer),
+      : BundledModule(buffer.cast<std::string_view>().data()),
+        bundled_program_ptr_(buffer),
         program_ptr_(static_cast<const void*>(
             bundled_program_flatbuffer::GetBundledProgram(
                 get_bundled_program_ptr())
@@ -460,6 +503,33 @@ struct PyBundledModule final {
     return program_len_;
   }
 
+  py::list verify_result_with_bundled_expected_output(
+      const std::string& method_name,
+      size_t testset_idx,
+      double rtol = 1e-5,
+      double atol = 1e-8) {
+    // Execute the method
+    auto result = BundledModule::execute(method_name, testset_idx);
+    if (!result.ok()) {
+      THROW_IF_ERROR(
+          result.error(),
+          "Method execution failed with status 0x%" PRIx32,
+          static_cast<uint32_t>(result.error()));
+    }
+
+    // Convert outputs to py::list
+    const auto& outputs = result.get();
+    py::list py_outputs = get_outputs_as_py_list(outputs);
+
+    Error status = BundledModule::verify_method_outputs(
+        method_name, testset_idx, rtol, atol);
+    THROW_IF_ERROR(
+        status,
+        "Result verification failed with status %" PRIu32,
+        static_cast<uint32_t>(status));
+    return py_outputs;
+  }
+
  private:
   // Store the bytes object instead of a raw pointer so that this module will
   // keep the bytes alive.
@@ -853,43 +923,6 @@ struct PyModule final {
     }
   }
 
-  void load_bundled_input(
-      PyBundledModule& m,
-      const std::string method_name,
-      size_t testset_idx) {
-    const void* bundled_program_ptr = m.get_bundled_program_ptr();
-    Error status = executorch::BUNDLED_PROGRAM_NAMESPACE::load_bundled_input(
-        module_->get_method(method_name), bundled_program_ptr, testset_idx);
-    THROW_IF_ERROR(
-        status,
-        "load_bundled_input failed with status 0x%" PRIx32,
-        static_cast<uint32_t>(status));
-  }
-
-  py::list verify_result_with_bundled_expected_output(
-      PyBundledModule& m,
-      const std::string method_name,
-      size_t testset_idx,
-      double rtol = 1e-5,
-      double atol = 1e-8) {
-    const void* bundled_program_ptr = m.get_bundled_program_ptr();
-    auto& method = module_->get_method(method_name);
-    Error status = executorch::BUNDLED_PROGRAM_NAMESPACE::load_bundled_input(
-        method, bundled_program_ptr, testset_idx);
-    THROW_IF_ERROR(
-        status,
-        "load_bundled_input failed with status 0x%" PRIx32,
-        static_cast<uint32_t>(status));
-    py::list outputs = plan_execute(method_name);
-    status = executorch::BUNDLED_PROGRAM_NAMESPACE::verify_method_outputs(
-        method, bundled_program_ptr, testset_idx, rtol, atol);
-    THROW_IF_ERROR(
-        status,
-        "Result verification failed with status %" PRIu32,
-        static_cast<uint32_t>(status));
-    return outputs;
-  }
-
   py::list plan_execute(
       const std::string method_name,
       bool clone_outputs = true) {
@@ -912,46 +945,6 @@ struct PyModule final {
     return get_outputs_as_py_list(outputs, clone_outputs);
   }
 
-  py::list get_outputs_as_py_list(
-      const std::vector<EValue>& outputs,
-      bool clone_outputs = true) {
-    const auto outputs_size = outputs.size();
-    py::list list(outputs_size);
-    for (size_t i = 0; i < outputs_size; ++i) {
-      auto& v = outputs[i];
-      if (Tag::None == v.tag) {
-        list[i] = py::none();
-      } else if (Tag::Int == v.tag) {
-        list[i] = py::cast(v.toInt());
-      } else if (Tag::Double == v.tag) {
-        list[i] = py::cast(v.toDouble());
-      } else if (Tag::Bool == v.tag) {
-        list[i] = py::cast(v.toBool());
-      } else if (Tag::String == v.tag) {
-        list[i] = py::cast(std::string(v.toString().data()));
-      } else if (Tag::Tensor == v.tag) {
-#ifdef USE_ATEN_LIB
-        // Clone so the outputs in python do not share a lifetime with the
-        // module object
-        if (clone_outputs) {
-          list[i] = py::cast(v.toTensor().clone());
-        } else {
-          list[i] = py::cast(v.toTensor());
-        }
-#else
-        if (clone_outputs) {
-          list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()).clone());
-        } else {
-          list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()));
-        }
-#endif
-      } else {
-        ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
-      }
-    }
-    return list;
-  }
-
   std::unique_ptr<PyMethodMeta> method_meta(const std::string method_name) {
     auto& method = module_->get_method(method_name);
     return std::make_unique<PyMethodMeta>(module_, method.method_meta());
@@ -1583,16 +1576,6 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
       call_guard);
 
   py::class_<PyModule>(m, "ExecuTorchModule")
-      .def("load_bundled_input", &PyModule::load_bundled_input, call_guard)
-      .def(
-          "verify_result_with_bundled_expected_output",
-          &PyModule::verify_result_with_bundled_expected_output,
-          py::arg("bundle"),
-          py::arg("method_name"),
-          py::arg("testset_idx"),
-          py::arg("rtol") = 1e-5,
-          py::arg("atol") = 1e-8,
-          call_guard)
       .def(
           "plan_execute",
           &PyModule::plan_execute,
@@ -1638,7 +1621,16 @@ PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
           py::arg("clone_outputs") = true,
           call_guard);
 
-  py::class_<PyBundledModule>(m, "BundledModule");
+  py::class_<PyBundledModule>(m, "BundledModule")
+      .def(
+          "verify_result_with_bundled_expected_output",
+          &PyBundledModule::verify_result_with_bundled_expected_output,
+          py::arg("method_name"),
+          py::arg("testset_idx"),
+          py::arg("rtol") = 1e-5,
+          py::arg("atol") = 1e-8,
+          call_guard);
+
   py::class_<PyTensorInfo>(m, "TensorInfo")
       .def("sizes", &PyTensorInfo::sizes, call_guard)
       .def("dtype", &PyTensorInfo::dtype, call_guard)
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
@@ -115,7 +115,7 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode"
 
       # pip wheels will need to be able to find the dependent libraries. On
       # Linux, the .so has non-absolute dependencies on libs like
-      # "_portable_lib.so" without paths; as long as we `import torch` first,
+      # "_portable_lib.so" and "libextension_module.so" without paths; as long as we `import torch` first,
       # those dependencies will work. But Apple dylibs do not support
       # non-absolute dependencies, so we need to tell the loader where to look
       # for its libraries. The LC_LOAD_DYLIB entries for the portable_lib
@@ -124,9 +124,9 @@ if(NOT CMAKE_GENERATOR STREQUAL "Xcode"
       # installed location of our _portable_lib.so file. To see these LC_*
       # values, run `otool -l libquantized_ops_lib.dylib`.
       if(APPLE)
-        set(RPATH "@loader_path/../../extensions/pybindings")
+        set(RPATH "@loader_path/../../extension/pybindings")
       else()
-        set(RPATH "$ORIGIN/../../extensions/pybindings")
+        set(RPATH "$ORIGIN/../../extension/pybindings")
       endif()
       set_target_properties(
         quantized_ops_aot_lib PROPERTIES BUILD_RPATH ${RPATH} INSTALL_RPATH
diff --git a/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl b/shim_et/xplat/executorch/extension/pybindings/pybindings.bzl
@@ -16,6 +16,8 @@ PORTABLE_MODULE_DEPS = [
     "//executorch/extension/data_loader:buffer_data_loader",
     "//executorch/extension/data_loader:mmap_data_loader",
     "//executorch/extension/memory_allocator:malloc_memory_allocator",
+    "//executorch/extension/module:module",
+    "//executorch/extension/module:bundled_module",
     "//executorch/runtime/executor/test:test_backend_compiler_lib",
     "//executorch/devtools/etdump:etdump_flatcc",
 ] + get_all_cpu_backend_targets()
@@ -28,6 +30,8 @@ ATEN_MODULE_DEPS = [
     "//executorch/extension/data_loader:buffer_data_loader",
     "//executorch/extension/data_loader:mmap_data_loader",
     "//executorch/extension/memory_allocator:malloc_memory_allocator",
+    "//executorch/extension/module:module_aten",
+    "//executorch/extension/module:bundled_module_aten",
     "//executorch/devtools/bundled_program:runtime_aten",
     "//executorch/runtime/executor/test:test_backend_compiler_lib_aten",
     "//executorch/devtools/etdump:etdump_flatcc",
diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake
@@ -272,6 +272,11 @@ check_required_options_on(
   EXECUTORCH_BUILD_EXTENSION_DATA_LOADER EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR
 )
 
+check_required_options_on(
+  IF_ON EXECUTORCH_BUILD_PYBIND REQUIRES
+  EXECUTORCH_BUILD_EXTENSION_MODULE
+)
+
 check_required_options_on(
   IF_ON EXECUTORCH_BUILD_KERNELS_LLM REQUIRES
   EXECUTORCH_BUILD_KERNELS_OPTIMIZED