From b1ce3e940a8e161198d65eefc7760f10ecd74fac Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Fri, 19 Sep 2025 15:48:03 -0700 Subject: [PATCH] Add XNNPACK backend option for workspace sharing (re-land) (#13934) Summary: **Note: This is a re-land, fixing a use after free which occurred when destroying a delegate instance. The executor is destroyed, which frees the workspace. The mutex that raii_lock points to is owned by the workspace. There is then a use after free when raii_lock goes out of scope. This is fixed by taking an owning reference to the workspace in destroy.** Add a backend option for XNNPACK to enable runtime control of workspace sharing. I've added 3 mode options - Disabled, PerModel, and Global. PerModel shares the workspace between all CALL_DELEGATE instances in a model, keyed by memory allocator address (see below). Global uses a single workspace instance. I've written the code to allow for the active workspace mode to be safely changed at any time. The workspace instance is resolved at delegate instance init time (model load) and is stored in the XNNExecutor instance. This design will also allow us to set per-model sharing options in the future. I've introduced a wrapper class (XNNWorkspace) to help with synchronization. With regard to the PerModel behavior, I am using the address of the runtime allocator to disambiguate the model. This is not ideal in the long-run, but there is some larger discussion around generating IDs in a coherent manner in multithreaded environments without synchronization in the core runtime. This might require PAL changes (exposing a thread ID, for example), so I intend to come back to this. It should be possible to transparently update this logic in the future. The program ID can collide or change without affecting correctness, but may increase memory (for collisions) or enforce extra synchronization (if unstable between delegate instances in a method). I'd like to add a PerMethod mode as a follow-up. This should be keyed to the specific method instance (not name), such that multiple method instances for the same method can be loaded for execution on different threads without forcing synchronization, but still allow sharing between call delegate instances in each method instance. This will require a unique method identifier. Reviewed By: digantdesai Differential Revision: D81647105 --- backends/test/multi_method_delegate_test.cpp | 86 ++++-- backends/xnnpack/runtime/XNNCompiler.cpp | 14 +- backends/xnnpack/runtime/XNNExecutor.h | 10 +- backends/xnnpack/runtime/XNNPACKBackend.cpp | 147 ++++++--- backends/xnnpack/runtime/XNNPACKBackend.h | 44 +++ backends/xnnpack/runtime/XNNWorkspace.h | 67 +++++ .../xnnpack/runtime/XNNWorkspaceManager.cpp | 130 ++++++++ .../xnnpack/runtime/XNNWorkspaceManager.h | 94 ++++++ backends/xnnpack/targets.bzl | 3 + .../test/runtime/test_workspace_manager.cpp | 280 ++++++++++++++++++ .../test/runtime/test_workspace_sharing.cpp | 179 +++++++++++ .../xnnpack/test/runtime/test_xnnexecutor.cpp | 2 +- backends/xnnpack/test/targets.bzl | 23 ++ .../executorch/build/build_variables.bzl | 1 + 14 files changed, 999 insertions(+), 81 deletions(-) create mode 100644 backends/xnnpack/runtime/XNNPACKBackend.h create mode 100644 backends/xnnpack/runtime/XNNWorkspace.h create mode 100644 backends/xnnpack/runtime/XNNWorkspaceManager.cpp create mode 100644 backends/xnnpack/runtime/XNNWorkspaceManager.h create mode 100644 backends/xnnpack/test/runtime/test_workspace_manager.cpp create mode 100644 backends/xnnpack/test/runtime/test_workspace_sharing.cpp diff --git a/backends/test/multi_method_delegate_test.cpp b/backends/test/multi_method_delegate_test.cpp index e24585434c4..bf17d7c8743 100644 --- a/backends/test/multi_method_delegate_test.cpp +++ b/backends/test/multi_method_delegate_test.cpp @@ -5,6 +5,10 @@ #include #include +#include + +#include +#include #include #include @@ -12,6 +16,11 @@ #include #include +using executorch::backends::xnnpack::workspace_sharing_mode_option_key; +using executorch::backends::xnnpack::WorkspaceSharingMode; +using executorch::backends::xnnpack::xnnpack_backend_key; + +using executorch::runtime::BackendOptions; using executorch::runtime::Error; using executorch::runtime::EValue; using executorch::runtime::HierarchicalAllocator; @@ -126,34 +135,61 @@ class XNNPACKMultiDelegateTest : public ETPTEMethodRunBaseTest { num_threads = 40; kMethodName = "forward"; } -}; -// This test is to validate the assumption that the delegate is thread safe. -// That includes the following: -// 1. The delegate can be initilized by multiple threads in parallel. -// 2. The delegate can be executed by multiple threads in parallel. -// 3. The delegate can be destroyed by multiple threads in parallel. -// Regardless of the underlying implementation of the delegate. -// This is particularly important when we have shared resources across -// delegate instances through a singleton backend instance. -TEST_F(XNNPACKMultiDelegateTest, MultipleThreads) { - ASSERT_NE(kTestPTE1Path.size(), 0); - ASSERT_NE(kTestPTE2Path.size(), 0); - ASSERT_NE(num_threads, 0); - ASSERT_NE(kMethodName.size(), 0); - - std::vector threads(num_threads); - std::atomic count{0}; - - for (int i = 0; i < num_threads; i++) { - threads[i] = std::thread([&, i]() { - run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count); - }); + // This test is to validate the assumption that the delegate is thread safe. + // That includes the following: + // 1. The delegate can be initilized by multiple threads in parallel. + // 2. The delegate can be executed by multiple threads in parallel. + // 3. The delegate can be destroyed by multiple threads in parallel. + // Regardless of the underlying implementation of the delegate. + // This is particularly important when we have shared resources across + // delegate instances through a singleton backend instance. + void runStressTest() { + ASSERT_NE(kTestPTE1Path.size(), 0); + ASSERT_NE(kTestPTE2Path.size(), 0); + ASSERT_NE(num_threads, 0); + ASSERT_NE(kMethodName.size(), 0); + + std::vector threads(num_threads); + std::atomic count{0}; + + for (int i = 0; i < num_threads; i++) { + threads[i] = std::thread([&, i]() { + run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count); + }); + } + for (int i = 0; i < num_threads; i++) { + threads[i].join(); + } + ASSERT_EQ(count, num_threads); } - for (int i = 0; i < num_threads; i++) { - threads[i].join(); + + void setWorkspaceSharingMode(WorkspaceSharingMode mode) { + executorch::runtime::runtime_init(); + + BackendOptions<1> backend_options; + backend_options.set_option( + workspace_sharing_mode_option_key, static_cast(mode)); + + auto status = executorch::runtime::set_option( + xnnpack_backend_key, backend_options.view()); + ASSERT_EQ(status, Error::Ok); } - ASSERT_EQ(count, num_threads); +}; + +TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsSharingDisabled) { + setWorkspaceSharingMode(WorkspaceSharingMode::Disabled); + runStressTest(); +} + +TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsPerModelSharing) { + setWorkspaceSharingMode(WorkspaceSharingMode::PerModel); + runStressTest(); +} + +TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsGlobalSharing) { + setWorkspaceSharingMode(WorkspaceSharingMode::Global); + runStressTest(); } // TODO(T208989291): Add more tests here. For example, diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index 78eaaf6d039..ad927ef8917 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -1895,9 +1895,8 @@ ET_NODISCARD Error XNNCompiler::compileModel( xnn_weights_cache_t weights_cache_ptr = nullptr; #endif -#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE - ET_CHECK_OR_RETURN_ERROR( - workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace"); + // NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to + // be null status = xnn_create_runtime_v4( subgraph.get(), weights_cache_ptr, @@ -1905,14 +1904,7 @@ ET_NODISCARD Error XNNCompiler::compileModel( ::executorch::extension::threadpool::get_pthreadpool(), runtime_flags, &runtime_ptr); -#else - status = xnn_create_runtime_v3( - subgraph.get(), - weights_cache_ptr, - ::executorch::extension::threadpool::get_pthreadpool(), - runtime_flags, - &runtime_ptr); -#endif + // NOLINTEND(facebook-hte-NullableDereference) ET_CHECK_OR_RETURN_ERROR( xnn_status_success == status, diff --git a/backends/xnnpack/runtime/XNNExecutor.h b/backends/xnnpack/runtime/XNNExecutor.h index f7084a5dd88..c7926744dd6 100644 --- a/backends/xnnpack/runtime/XNNExecutor.h +++ b/backends/xnnpack/runtime/XNNExecutor.h @@ -9,13 +9,13 @@ #pragma once #include +#include #include #include #include #include #include -#include #include #include @@ -35,9 +35,11 @@ class XNNExecutor { std::vector output_ids_; std::vector externals_; std::vector packed_data_names_; + std::shared_ptr workspace_; public: - XNNExecutor() = default; + XNNExecutor(std::shared_ptr workspace) + : workspace_(workspace) {} inline size_t getNumInputs() { return input_ids_.size(); @@ -51,6 +53,10 @@ class XNNExecutor { return packed_data_names_; } + inline std::shared_ptr get_workspace() { + return workspace_; + } + /** * Initialize the XNNExecutor with a given runtime and input/output ids. * The input/output ids are expected to be sorted in order of their diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index b05919ecf2b..70845b6cab1 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -7,7 +7,10 @@ */ #include +#include #include +#include +#include #include #include #include @@ -21,14 +24,18 @@ namespace executorch { namespace backends { +using executorch::backends::xnnpack::WorkspaceSharingMode; +using executorch::backends::xnnpack::XNNWorkspace; using executorch::backends::xnnpack::delegate::XNNWeightsCache; using executorch::ET_RUNTIME_NAMESPACE::Backend; using executorch::ET_RUNTIME_NAMESPACE::BackendExecutionContext; using executorch::ET_RUNTIME_NAMESPACE::BackendInitContext; +using executorch::ET_RUNTIME_NAMESPACE::BackendOptionContext; using executorch::ET_RUNTIME_NAMESPACE::CompileSpec; using executorch::ET_RUNTIME_NAMESPACE::DelegateHandle; using executorch::ET_RUNTIME_NAMESPACE::NamedDataMap; using executorch::runtime::ArrayRef; +using executorch::runtime::BackendOption; using executorch::runtime::Error; using executorch::runtime::EValue; using executorch::runtime::FreeableBuffer; @@ -51,23 +58,8 @@ class XnnpackBackend final return; } -#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE - // Create a workspace for the XNNExecutor to use. This workspace will be - // shared across all delegate instances. - ET_LOG(Debug, "Creating XNN workspace"); - xnn_workspace_t workspace = nullptr; - status = xnn_create_workspace(&workspace); - if (status != xnn_status_success) { - ET_LOG( - Error, - "Failed to create XNN workspace, XNNPACK status: 0x%x", - (unsigned int)status); - workspace = nullptr; - return; - } - workspace_.reset(workspace); - ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get()); -#endif // ENABLE_XNNPACK_SHARED_WORKSPACE + // Workspace manager is initialized with the appropriate default mode in its + // constructor } bool is_available() const override { @@ -85,11 +77,12 @@ class XnnpackBackend final } const NamedDataMap* named_data_map = context.get_named_data_map(); - // thread safe. This can heppen when multiple threads call init() on + // thread safe. This can happen when multiple threads call init() on // the same backend instance. -#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE - const std::lock_guard lock(workspace_mutex_); -#endif + + auto program_id = + reinterpret_cast(context.get_runtime_allocator()); + auto workspace = ET_UNWRAP(get_or_create_workspace(program_id)); #ifdef ENABLE_XNNPACK_WEIGHTS_CACHE const std::lock_guard lock_weight_cache(weights_cache_mutex_); @@ -97,17 +90,19 @@ class XnnpackBackend final context.get_runtime_allocator(), named_data_map); #endif + auto [workspace_lock, workspace_ptr] = workspace->acquire(); + // Executor has been allocated but not constructed, ensure that runtime_ is // nullptr by constructing it in place here. NOTE: Since we use placement // new and since this type is not trivially destructible, we must call the // destructor manually in destroy(). - new (executor) xnnpack::delegate::XNNExecutor; + new (executor) xnnpack::delegate::XNNExecutor(workspace); Error err = xnnpack::delegate::XNNCompiler::compileModel( processed->data(), processed->size(), executor, weights_cache_.get(), - workspace_.get(), + workspace_ptr, named_data_map); // This backend does not need its processed data after compiling the model. processed->Free(); @@ -130,14 +125,12 @@ class XnnpackBackend final Span args) const override { auto executor = static_cast(handle); -#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE - const std::lock_guard lock(workspace_mutex_); -#endif - #ifdef ENABLE_XNNPACK_WEIGHTS_CACHE const std::lock_guard lock_weights_cache(weights_cache_mutex_); #endif + auto [raii_lock, _] = executor->get_workspace()->acquire(); + // Prepare Inputs/Outputs and Propagate Input Shapes Error err = executor->prepare_args(args); if (err != Error::Ok) { @@ -158,13 +151,6 @@ class XnnpackBackend final void destroy(DelegateHandle* handle) const override { if (handle != nullptr) { - // This is needed to serialize access to xnn_delete_runtime which is not - // thread safe. This can heppen when multiple threads call destroy() on - // the same backend instance. -#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE - const std::lock_guard lock(workspace_mutex_); -#endif - auto executor = static_cast(handle); #ifdef ENABLE_XNNPACK_PROFILING @@ -176,18 +162,87 @@ class XnnpackBackend final weights_cache_mutex_); weights_cache_->delete_packed_data(executor->get_packed_data_names()); #endif + + // This is needed to serialize access to xnn_delete_runtime which is not + // thread safe. This can heppen when multiple threads call destroy() on + // the same backend instance. Make sure to hold onto the workspace + // shared_ptr, as the pointer in the executor is freed, which includes + // the mutex referenced by raii_lock. + auto workspace = executor->get_workspace(); + auto [raii_lock, _] = workspace->acquire(); + // XNNExecutor is not trivially destructible. Since this was constructed // manually in init(), we must destroy it manually here. executor->~XNNExecutor(); } } + Error get_option_internal( + BackendOptionContext& context, + executorch::runtime::Span& + backend_options) const { + // Intentionally not locking here as it is not required. + + // Verify that the expected option key is present and modify the value + for (size_t i = 0; i < backend_options.size(); ++i) { + if (strcmp( + backend_options[i].key, + xnnpack::workspace_sharing_mode_option_key) == 0) { + // Set the value to what was stored by set_option + backend_options[i].value = + static_cast(workspace_manager_.get_sharing_mode()); + } + } + + return Error::Ok; + } + + Error get_option( + BackendOptionContext& context, + executorch::runtime::Span& + backend_options) override { + return get_option_internal(context, backend_options); + } + + Error set_option( + BackendOptionContext& context, + const executorch::runtime::Span& + backend_options) override { + if (backend_options.size() > 0) { + for (const auto& option : backend_options) { + if (strcmp(option.key, xnnpack::workspace_sharing_mode_option_key) == + 0) { + if (auto* val = std::get_if(&option.value)) { + if (*val < 0 || + *val > static_cast(WorkspaceSharingMode::Count)) { + ET_LOG( + Error, + "XNNPACK workspace sharing mode must be between 0 and %d, inclusive, but was %d.", + static_cast(WorkspaceSharingMode::Count), + *val); + return Error::InvalidArgument; + } + + ET_LOG( + Debug, "Setting XNNPACK workspace sharing mode to %d.", *val); + auto status = workspace_manager_.set_sharing_mode( + static_cast(*val)); + if (status != Error::Ok) { + return status; + } + } else { + ET_LOG(Error, "XNNPACK workspace sharing mode must be an integer."); + return Error::InvalidArgument; + } + } + } + } + return Error::Ok; + } + private: - // This is a global workspace for all delegate instances. - mutable std::mutex workspace_mutex_; - std::unique_ptr workspace_{ - nullptr, - &xnn_release_workspace}; + // Workspace manager for handling workspace sharing modes + mutable xnnpack::XNNWorkspaceManager workspace_manager_; // Weights cache is global to all delegate instances. mutable std::mutex weights_cache_mutex_; @@ -195,13 +250,21 @@ class XnnpackBackend final std::make_unique(); // Lock Hiearchy for Mutexes: - // workspace_mutex_ // weights_cache_mutex_ + // workspace_meta_mutex_ + // workspace_mutex_ (owned by executor) + + // Retrieve a workspace for the given method ID, depending on the sharing + // mode. + Result> get_or_create_workspace( + uintptr_t program_id) const { + return workspace_manager_.get_or_create_workspace(program_id); + } }; namespace { -auto cls = XnnpackBackend(); -Backend backend{"XnnpackBackend", &cls}; +auto backend_instance = XnnpackBackend(); +Backend backend{xnnpack::xnnpack_backend_key, &backend_instance}; static auto success_with_compiler = register_backend(backend); } // namespace diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h new file mode 100644 index 00000000000..e6930dfeb5c --- /dev/null +++ b/backends/xnnpack/runtime/XNNPACKBackend.h @@ -0,0 +1,44 @@ +#pragma once + +#include + +namespace executorch::backends::xnnpack { +/// The key for the backend. This is used to register the backend, check +/// availability, and get/set options. +const char xnnpack_backend_key[] = "XnnpackBackend"; + +/// The key for the workspace sharing option. See the WorkspaceSharingMode enum +/// for a description of the associated functionality. +const char workspace_sharing_mode_option_key[] = "workspace_sharing_mode"; + +/// Workspace sharing mode. This is a backend option that can be set via the +/// set_option API to control memory sharing between CALL_DELEGATE instances. +/// This is useful for reducing memory consumption. +enum class WorkspaceSharingMode { + /// No workspace sharing. Each CALL_DELEGATE instance will have its own + /// workspace (memory arena). + Disabled = 0, + + /// All CALL_DELEGATE instances in a given program will share a workspace. + /// This reduces memory consumption + /// for methods with multiple delegate calls, at the cost of only allowing one + /// method to execute at a time. + PerModel = 1, + + /// All CALL_DELEGATE instances accross all loaded methods will share a + /// workspace. This reduces memory + /// consumption by overlapping activation memory between methods but enforces + /// synchronization between + /// methods. If multiple methods are run concurrently, it may block as only + /// one delegate call occur + /// at a time. Additionally, the workspace does not shrink when a method is + /// unloaded, so memory will + /// only be reclaimed when all XNNPACK-delegated methods are unloaded. + Global = 2, + + /// The number of workspace sharing modes. This is not a valid mode and is + /// only used for tracking the + // maximum enum value. + Count, +}; +} // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/runtime/XNNWorkspace.h b/backends/xnnpack/runtime/XNNWorkspace.h new file mode 100644 index 00000000000..36596b05089 --- /dev/null +++ b/backends/xnnpack/runtime/XNNWorkspace.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +namespace executorch::backends::xnnpack { + +using WorkspacePtr = + std::unique_ptr; + +/// A lightweight wrapper around an underlying xnn_workspace_t instance, bundled +/// with appropriate synchronization. +class XNNWorkspace { + public: + XNNWorkspace(WorkspacePtr workspace) : workspace_(std::move(workspace)){}; + XNNWorkspace(const XNNWorkspace&) = delete; + XNNWorkspace& operator=(const XNNWorkspace&) = delete; + // Not moveable due to std::mutex. + XNNWorkspace(XNNWorkspace&&) = delete; + XNNWorkspace& operator=(XNNWorkspace&&) = delete; + + std::pair, xnn_workspace_t> acquire() { + auto lock = std::unique_lock(mutex_); + return {std::move(lock), workspace_.get()}; + } + + // Return the workspace pointer withot acquiring the lock. This should be used + // carefully, as it can lead to crashes or data corruption if the workspace is + // used concurrently.s + xnn_workspace_t unsafe_get_workspace() { + return workspace_.get(); + } + + static runtime::Result> create() { + // Because this class can't be moved, we need to construct it in-place. + xnn_workspace_t workspace = nullptr; + auto status = xnn_create_workspace(&workspace); + if (status != xnn_status_success) { + ET_LOG( + Error, + "Failed to create XNN workspace, XNNPACK status: 0x%x", + (unsigned int)status); + return runtime::Error::Internal; + } + + return std::make_shared( + WorkspacePtr(workspace, &xnn_release_workspace)); + } + + private: + std::mutex mutex_; + WorkspacePtr workspace_; +}; + +} // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/runtime/XNNWorkspaceManager.cpp b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp new file mode 100644 index 00000000000..d8c6dae4d6d --- /dev/null +++ b/backends/xnnpack/runtime/XNNWorkspaceManager.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include // For PRIuPTR + +namespace executorch::backends::xnnpack { + +using executorch::runtime::Error; +using executorch::runtime::Result; + +XNNWorkspaceManager::XNNWorkspaceManager() { +#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE + sharing_mode_ = WorkspaceSharingMode::Global; +#else + sharing_mode_ = WorkspaceSharingMode::Disabled; +#endif // ENABLE_XNNPACK_SHARED_WORKSPACE +} + +runtime::Error XNNWorkspaceManager::set_sharing_mode( + WorkspaceSharingMode mode) { + // Validate that the mode is valid + if (static_cast(mode) < 0 || + static_cast(mode) >= static_cast(WorkspaceSharingMode::Count)) { + ET_LOG( + Error, + "XNNPACK workspace sharing mode must be between 0 and %d, inclusive, but was %d.", + static_cast(WorkspaceSharingMode::Count) - 1, + static_cast(mode)); + return runtime::Error::InvalidArgument; + } + + sharing_mode_ = mode; + return runtime::Error::Ok; +} + +WorkspaceSharingMode XNNWorkspaceManager::get_sharing_mode() const { + return sharing_mode_.load(); +} + +Result> +XNNWorkspaceManager::get_or_create_workspace(uintptr_t program_id) const { + auto mode = sharing_mode_.load(); + + // Get or create the workspace according to the current sharing mode. + if (mode == WorkspaceSharingMode::Disabled) { + ET_LOG(Debug, "Instantiating workspace."); + auto create_result = XNNWorkspace::create(); + if (!create_result.ok()) { + return create_result.error(); + } + + return create_result.get(); + } else if (mode == WorkspaceSharingMode::PerModel) { + return get_or_create_model_workspace(program_id); + } else if (mode == WorkspaceSharingMode::Global) { + return get_or_create_global_workspace(); + } else { + ET_LOG( + Error, "Invalid workspace sharing mode: %d.", static_cast(mode)); + return Error::Internal; + } +} + +Result> +XNNWorkspaceManager::get_or_create_global_workspace() const { + std::scoped_lock lock(workspace_meta_mutex_); + + // Check for an existing (live) global workspace. + std::shared_ptr workspace = {}; + if (auto live_workspace = global_workspace_.lock()) { + workspace = live_workspace; + } + + // Allocate a new workspace if needed. + if (!workspace) { + auto create_result = XNNWorkspace::create(); + if (!create_result.ok()) { + return create_result.error(); + } + workspace = create_result.get(); + ET_LOG( + Debug, + "Created global workspace %p.", + workspace->unsafe_get_workspace()); + global_workspace_ = workspace; + } + + return workspace; +} + +Result> +XNNWorkspaceManager::get_or_create_model_workspace(uintptr_t program_id) const { + std::scoped_lock lock(workspace_meta_mutex_); + + // Check for an existing (live) workspace for this program. + auto match = model_workspaces_.find(program_id); + std::shared_ptr workspace = {}; + if (match != model_workspaces_.end()) { + if (auto live_workspace = match->second.lock()) { + workspace = live_workspace; + } + } + + // Allocate a new workspace if needed. + if (!workspace) { + auto create_result = XNNWorkspace::create(); + if (!create_result.ok()) { + return create_result.error(); + } + workspace = create_result.get(); + ET_LOG( + Debug, + "Created workspace %p for program %" PRIuPTR ".", + workspace->unsafe_get_workspace(), + program_id); + model_workspaces_.insert( + {program_id, std::weak_ptr(workspace)}); + } + + return workspace; +} + +} // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/runtime/XNNWorkspaceManager.h b/backends/xnnpack/runtime/XNNWorkspaceManager.h new file mode 100644 index 00000000000..52db1184bbd --- /dev/null +++ b/backends/xnnpack/runtime/XNNWorkspaceManager.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace executorch::backends::xnnpack { + +/** + * XNNWorkspaceManager manages XNNPACK workspaces based on the configured + * workspace sharing mode. + * + * It supports three modes: + * - Disabled: Each delegate instance gets its own workspace + * - PerModel: All delegate instances in a model share a workspace + * - Global: All delegate instances across all models share a workspace + */ +class XNNWorkspaceManager { + public: + XNNWorkspaceManager(); + ~XNNWorkspaceManager() = default; + + /** + * Set the workspace sharing mode. + * + * @param mode The workspace sharing mode to set. + * @return Error::Ok if the mode was set successfully. + */ + runtime::Error set_sharing_mode(WorkspaceSharingMode mode); + + /** + * Get the current workspace sharing mode. + * + * @return The current workspace sharing mode. + */ + WorkspaceSharingMode get_sharing_mode() const; + + /** + * Retrieve a workspace for the given program ID, depending on the sharing + * mode. A workspace will be created if needed. + * + * @param program_id The ID of the program requesting a workspace. + * @return A Result containing a shared_ptr to the workspace, or an error. + */ + runtime::Result> get_or_create_workspace( + uintptr_t program_id) const; + + private: + // The active sharing mode. Changes to this affect only models loaded after + // the change. + std::atomic sharing_mode_; + + // A mutex guarding global_workspace_ and model_workspaces_. Note that this + // mutex only guards the top-level definitions, not the contents of the + // workspace. The contents of the workspace are guarded by the workspace's own + // mutex in the XNNWorkspace class. + mutable std::mutex workspace_meta_mutex_; + + // A global workspace for all delegate instances, if global sharing is + // enabled. Lazy initialized. Stored as a weak pointer to allow automatic + // cleanup when all references are released. + mutable std::weak_ptr global_workspace_; + + // A map from program id to workspace for delegate instances, if per model + // sharing is enabled. Workspaces are owned by the executor instances via + // shared_ptr. They are tracked here via weak pointers to allow automatic + // cleanup when the executors are destroyed while being retrievable when + // instantiating new executors. + mutable std::unordered_map> + model_workspaces_; + + // Retrieve the global workspace, lazy initializing it if needed. + runtime::Result> + get_or_create_global_workspace() const; + + // Get or create a workspace for the given program ID. + runtime::Result> get_or_create_model_workspace( + uintptr_t program_id) const; +}; + +} // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl index 0eab89a00f9..623ee278803 100644 --- a/backends/xnnpack/targets.bzl +++ b/backends/xnnpack/targets.bzl @@ -59,6 +59,9 @@ def define_common_targets(): exported_deps = [ "//executorch/runtime/backend:interface" + aten_suffix, ], + exported_headers = [ + "runtime/XNNPACKBackend.h", + ], deps = [ third_party_dep("XNNPACK"), "//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header", diff --git a/backends/xnnpack/test/runtime/test_workspace_manager.cpp b/backends/xnnpack/test/runtime/test_workspace_manager.cpp new file mode 100644 index 00000000000..ddb7074a1ce --- /dev/null +++ b/backends/xnnpack/test/runtime/test_workspace_manager.cpp @@ -0,0 +1,280 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include + +#include + +using namespace ::testing; + +using executorch::backends::xnnpack::WorkspaceSharingMode; +using executorch::backends::xnnpack::XNNWorkspace; +using executorch::backends::xnnpack::XNNWorkspaceManager; +using executorch::runtime::Error; +using executorch::runtime::Result; + +class XNNWorkspaceManagerTest : public ::testing::Test { + protected: + void SetUp() override { + // Log calls will abort if PAL is not initialized. + executorch::runtime::runtime_init(); + + // Initialize a new workspace manager for each test. + workspace_manager_ = std::make_unique(); + } + + std::unique_ptr workspace_manager_; +}; + +TEST_F(XNNWorkspaceManagerTest, SetAndGetSharingMode) { + // Test setting and getting the sharing mode + EXPECT_EQ( + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Disabled), + Error::Ok); + EXPECT_EQ( + workspace_manager_->get_sharing_mode(), WorkspaceSharingMode::Disabled); + + EXPECT_EQ( + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::PerModel), + Error::Ok); + EXPECT_EQ( + workspace_manager_->get_sharing_mode(), WorkspaceSharingMode::PerModel); + + EXPECT_EQ( + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Global), + Error::Ok); + EXPECT_EQ( + workspace_manager_->get_sharing_mode(), WorkspaceSharingMode::Global); +} + +TEST_F(XNNWorkspaceManagerTest, SetInvalidSharingMode) { + // First set a valid mode to ensure we're starting from a known state. + EXPECT_EQ( + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Disabled), + Error::Ok); + EXPECT_EQ( + workspace_manager_->get_sharing_mode(), WorkspaceSharingMode::Disabled); + + // Try to set an invalid mode. + WorkspaceSharingMode invalid_mode = static_cast(70); + EXPECT_EQ( + workspace_manager_->set_sharing_mode(invalid_mode), + Error::InvalidArgument); + + // The mode should not have changed. + EXPECT_EQ( + workspace_manager_->get_sharing_mode(), WorkspaceSharingMode::Disabled); +} + +TEST_F(XNNWorkspaceManagerTest, DisabledMode) { + // Verify that each call retrieves a new workspace when sharing is disabled. + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Disabled); + + uintptr_t program_id = 12345; + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + auto workspace3_result = + workspace_manager_->get_or_create_workspace(program_id + 1); + ASSERT_TRUE(workspace3_result.ok()); + auto workspace3 = workspace3_result.get(); + + EXPECT_NE(workspace1, workspace2); + EXPECT_NE(workspace1, workspace3); + EXPECT_NE(workspace2, workspace3); + EXPECT_NE( + workspace1->unsafe_get_workspace(), workspace2->unsafe_get_workspace()); + EXPECT_NE( + workspace1->unsafe_get_workspace(), workspace3->unsafe_get_workspace()); + EXPECT_NE( + workspace2->unsafe_get_workspace(), workspace3->unsafe_get_workspace()); +} + +TEST_F(XNNWorkspaceManagerTest, PerModelMode) { + // In PerModel mode, calls with the same program_id should return the same + // workspace. + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::PerModel); + + // Get two workspaces with the same program ID and one different. + uintptr_t program_id = 12345; + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + auto workspace3_result = + workspace_manager_->get_or_create_workspace(program_id + 1); + ASSERT_TRUE(workspace3_result.ok()); + auto workspace3 = workspace3_result.get(); + + // Workspace 1 and 2 should be the same, but different from workspace 3. + EXPECT_EQ(workspace1, workspace2); + EXPECT_EQ( + workspace1->unsafe_get_workspace(), workspace2->unsafe_get_workspace()); + + EXPECT_NE(workspace1, workspace3); + EXPECT_NE( + workspace1->unsafe_get_workspace(), workspace3->unsafe_get_workspace()); +} + +TEST_F(XNNWorkspaceManagerTest, GlobalMode) { + // In Global mode, all calls should return the same workspace. + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Global); + + // Get workspaces with different program IDs + uintptr_t program_id1 = 12345; + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id1); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + uintptr_t program_id2 = 67890; + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id2); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + EXPECT_EQ(workspace1, workspace2); + EXPECT_EQ( + workspace1->unsafe_get_workspace(), workspace2->unsafe_get_workspace()); +} + +TEST_F(XNNWorkspaceManagerTest, PerModelModeCleanup) { + // Test that workspaces are properly cleaned up when shared_ptr is destroyed + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::PerModel); + + uintptr_t program_id = 12345; + xnn_workspace_t raw_workspace1 = nullptr; + + // Create a scope to control the lifetime of workspace1 + { + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + // Store the raw pointer for later comparison + raw_workspace1 = workspace1->unsafe_get_workspace(); + + // Let workspace1 go out of scope and be destroyed + } + + // Get a new workspace with the same program ID + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + // Since the previous workspace was destroyed, we should get a new one. + EXPECT_NE(workspace2->unsafe_get_workspace(), raw_workspace1); +} + +TEST_F(XNNWorkspaceManagerTest, GlobalModeCleanup) { + // Test that global workspaces are properly cleaned up when all users + // are destroyed. + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Global); + + uintptr_t program_id = 12345; + xnn_workspace_t raw_workspace1 = nullptr; + + // Create a scope to control the lifetime of workspace1 + { + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + // Store the raw pointer for later comparison + raw_workspace1 = workspace1->unsafe_get_workspace(); + + // Let workspace1 go out of scope and be destroyed + } + + // Get a new workspace (program ID doesn't matter in Global mode) + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + // Since the previous workspace was destroyed, we should get a new one. + EXPECT_NE(workspace2->unsafe_get_workspace(), raw_workspace1); +} + +TEST_F(XNNWorkspaceManagerTest, SwitchingModes) { + // Test switching between different sharing modes + + // Start with Disabled mode + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Disabled); + + // Get a workspace + uintptr_t program_id = 12345; + auto workspace1_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace1_result.ok()); + auto workspace1 = workspace1_result.get(); + + // Switch to PerModel mode + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::PerModel); + + // Get another workspace with the same program ID + auto workspace2_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace2_result.ok()); + auto workspace2 = workspace2_result.get(); + + // Should be a different workspace + EXPECT_NE(workspace1, workspace2); + + // Get another workspace with the same program ID in PerModel mode + auto workspace3_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace3_result.ok()); + auto workspace3 = workspace3_result.get(); + + // Should be the same workspace as workspace2 + EXPECT_EQ(workspace2, workspace3); + + // Switch to Global mode + workspace_manager_->set_sharing_mode(WorkspaceSharingMode::Global); + + // Get another workspace + auto workspace4_result = + workspace_manager_->get_or_create_workspace(program_id); + ASSERT_TRUE(workspace4_result.ok()); + auto workspace4 = workspace4_result.get(); + + // Should be a different workspace since we switched modes + EXPECT_NE(workspace3, workspace4); + + // Get a workspace with a different program ID in Global mode + uintptr_t different_program_id = 67890; + auto workspace5_result = + workspace_manager_->get_or_create_workspace(different_program_id); + ASSERT_TRUE(workspace5_result.ok()); + auto workspace5 = workspace5_result.get(); + + // Should be the same workspace as workspace4 + EXPECT_EQ(workspace4, workspace5); +} diff --git a/backends/xnnpack/test/runtime/test_workspace_sharing.cpp b/backends/xnnpack/test/runtime/test_workspace_sharing.cpp new file mode 100644 index 00000000000..66f0d012acd --- /dev/null +++ b/backends/xnnpack/test/runtime/test_workspace_sharing.cpp @@ -0,0 +1,179 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +using namespace ::testing; + +using executorch::backends::xnnpack::workspace_sharing_mode_option_key; +using executorch::backends::xnnpack::WorkspaceSharingMode; +using executorch::backends::xnnpack::xnnpack_backend_key; +using executorch::extension::Module; +using executorch::extension::TensorPtr; +using executorch::runtime::BackendOption; +using executorch::runtime::BackendOptions; +using executorch::runtime::Error; + +TensorPtr create_input_tensor(float val); +void run_and_validate_two_models( + std::optional mode1 = std::nullopt, + std::optional mode2 = std::nullopt); +void set_and_check_workspace_sharing_mode(WorkspaceSharingMode mode); + +TEST(WorkspaceSharing, SetMode) { + // Try setting and reading back the mode a few times. + set_and_check_workspace_sharing_mode(WorkspaceSharingMode::Disabled); + set_and_check_workspace_sharing_mode(WorkspaceSharingMode::PerModel); + set_and_check_workspace_sharing_mode(WorkspaceSharingMode::Global); +} + +TEST(WorkspaceSharing, SetInvalidMode) { + // Make sure we can't set an invalid mode. + + // Set to an initial known value. + set_and_check_workspace_sharing_mode(WorkspaceSharingMode::PerModel); + + // Set to a bad value. + BackendOptions<1> backend_options; + backend_options.set_option(workspace_sharing_mode_option_key, 70); + + auto status = executorch::runtime::set_option( + xnnpack_backend_key, backend_options.view()); + ASSERT_EQ(status, Error::InvalidArgument); + + // Make sure the option is still set to a valid value. + BackendOption read_option; + strcpy(read_option.key, workspace_sharing_mode_option_key); + read_option.value = -1; + status = get_option(xnnpack_backend_key, read_option); + + ASSERT_TRUE( + std::get(read_option.value) == + static_cast(WorkspaceSharingMode::PerModel)); +} + +TEST(WorkspaceSharing, RunWithDisabledMode) { + // Load and run some PTEs with workspace sharing disabled. + run_and_validate_two_models(WorkspaceSharingMode::Disabled); +} + +TEST(WorkspaceSharing, RunWithPerModelMode) { + // Load and run some PTEs with per-model workspace sharing. + run_and_validate_two_models(WorkspaceSharingMode::PerModel); +} + +TEST(WorkspaceSharing, RunWithGlobalMode) { + // Load and run some PTEs with global workspace sharing. + run_and_validate_two_models(WorkspaceSharingMode::Global); +} + +TEST(WorkspaceSharing, RunWithModeSwitch) { + // Check each pair of modes, loading one model in one mode and the other in + // the other mode. + + std::array modes = { + WorkspaceSharingMode::Disabled, + WorkspaceSharingMode::PerModel, + WorkspaceSharingMode::Global}; + + for (auto i = 0; i < modes.size(); ++i) { + for (auto j = i + 1; j < modes.size(); ++j) { + run_and_validate_two_models(modes[i], modes[j]); + } + } +} + +TensorPtr create_input_tensor(float val) { + // Create an f32 tensor with shape [10, 10, 10], matching the input of the + // test models. + std::vector data(1000, val); + + // Note that the tensor pointer takes ownership of the data vector. + return executorch::extension::make_tensor_ptr({10, 10, 10}, std::move(data)); +} + +void run_and_validate_two_models( + std::optional mode1, + std::optional mode2) { + // Load and run two models, verifying that the output tensors are correct, + // optionally setting sharing mode. + + if (mode1) { + set_and_check_workspace_sharing_mode(*mode1); + } + + Module mod1(std::getenv("ET_XNNPACK_GENERATED_ADD_LARGE_PTE_PATH")); + + auto a = create_input_tensor(1.0); + auto b = create_input_tensor(2.0); + auto c = create_input_tensor(3.0); + + auto result = mod1.forward({a, b, c}); + EXPECT_TRUE(result.ok()); + + // Expected output is 2a + 2b + c. + auto output_val = 1.0 * 2 + 2.0 * 2 + 3.0; + auto& output_tensor = result.get()[0].toTensor(); + for (auto i = 0; i < output_tensor.numel(); ++i) { + ASSERT_EQ(output_tensor.const_data_ptr()[i], output_val); + } + + if (mode2) { + set_and_check_workspace_sharing_mode(*mode2); + } + + Module mod2(std::getenv("ET_XNNPACK_GENERATED_SUB_LARGE_PTE_PATH")); + + auto result2 = mod2.forward({a, b, c}); + EXPECT_TRUE(result2.ok()); + + // Expected output is zero (the subtract operations cancel out). + auto& output_tensor2 = result2.get()[0].toTensor(); + for (auto i = 0; i < output_tensor2.numel(); ++i) { + ASSERT_EQ(output_tensor2.const_data_ptr()[i], 0); + } + + // Run mod1 again to validate that it gives correct results in the second mode + auto result3 = mod1.forward({a, b, c}); + EXPECT_TRUE(result3.ok()); + + // Expected output is still 2a + 2b + c + auto& output_tensor3 = result3.get()[0].toTensor(); + for (auto i = 0; i < output_tensor3.numel(); ++i) { + ASSERT_EQ(output_tensor3.const_data_ptr()[i], output_val); + } +} + +void set_and_check_workspace_sharing_mode(WorkspaceSharingMode mode) { + executorch::runtime::runtime_init(); + + BackendOptions<1> backend_options; + backend_options.set_option( + workspace_sharing_mode_option_key, static_cast(mode)); + + auto status = executorch::runtime::set_option( + xnnpack_backend_key, backend_options.view()); + ASSERT_EQ(status, Error::Ok); + + // Read the option back to sanity check. + BackendOption read_option; + strcpy(read_option.key, workspace_sharing_mode_option_key); + read_option.value = -1; + status = get_option(xnnpack_backend_key, read_option); + + ASSERT_TRUE(std::get(read_option.value) == static_cast(mode)); +} diff --git a/backends/xnnpack/test/runtime/test_xnnexecutor.cpp b/backends/xnnpack/test/runtime/test_xnnexecutor.cpp index b2a56f6283d..568c3c4ec35 100644 --- a/backends/xnnpack/test/runtime/test_xnnexecutor.cpp +++ b/backends/xnnpack/test/runtime/test_xnnexecutor.cpp @@ -18,7 +18,7 @@ using executorch::runtime::Span; using executorch::runtime::testing::TensorFactory; TEST(XNNExecutorTest, ArgumentWithTooManyDimensions) { - XNNExecutor executor; + XNNExecutor executor({}); xnn_subgraph_t subgraph = nullptr; xnn_runtime_t rt = nullptr; et_pal_init(); diff --git a/backends/xnnpack/test/targets.bzl b/backends/xnnpack/test/targets.bzl index f175e9655ea..04517c035fe 100644 --- a/backends/xnnpack/test/targets.bzl +++ b/backends/xnnpack/test/targets.bzl @@ -63,3 +63,26 @@ def define_common_targets(): "ET_MODULE_LINEAR_XNN_DATA_PATH": "$(location fbcode//executorch/test/models:exported_xnnpack_program_and_data[ModuleLinear.ptd])", }, ) + + runtime.cxx_test( + name = "test_workspace_sharing", + srcs = ["runtime/test_workspace_sharing.cpp"], + deps = [ + "//executorch/extension/module:module", + "//executorch/extension/tensor:tensor", + "//executorch/backends/xnnpack:xnnpack_backend", + ], + env = { + "ET_XNNPACK_GENERATED_ADD_LARGE_PTE_PATH": "$(location fbcode//executorch/test/models:exported_xnnp_delegated_programs[ModuleAddLarge.pte])", + "ET_XNNPACK_GENERATED_SUB_LARGE_PTE_PATH": "$(location fbcode//executorch/test/models:exported_xnnp_delegated_programs[ModuleSubLarge.pte])", + }, + ) + + runtime.cxx_test( + name = "test_workspace_manager", + srcs = ["runtime/test_workspace_manager.cpp"], + deps = [ + third_party_dep("XNNPACK"), + "//executorch/backends/xnnpack:xnnpack_backend", + ], + ) diff --git a/shim_et/xplat/executorch/build/build_variables.bzl b/shim_et/xplat/executorch/build/build_variables.bzl index 96cffb96e00..ea086886449 100644 --- a/shim_et/xplat/executorch/build/build_variables.bzl +++ b/shim_et/xplat/executorch/build/build_variables.bzl @@ -465,6 +465,7 @@ XNNPACK_BACKEND_BUCK_SRCS = [ "runtime/XNNHeader.cpp", "runtime/XNNPACKBackend.cpp", "runtime/XNNWeightsCache.cpp", + "runtime/XNNWorkspaceManager.cpp", "runtime/profiling/XNNProfiler.cpp", ]