pytorch
diff --git a/‎backends/test/multi_method_delegate_test.cpp‎
Lines changed: 61 additions & 25 deletions b/‎backends/test/multi_method_delegate_test.cpp‎
Lines changed: 61 additions & 25 deletions
diff --git a/‎backends/xnnpack/runtime/XNNCompiler.cpp‎
Lines changed: 3 additions & 11 deletions b/‎backends/xnnpack/runtime/XNNCompiler.cpp‎
Lines changed: 3 additions & 11 deletions
diff --git a/‎backends/xnnpack/runtime/XNNExecutor.h‎
Lines changed: 8 additions & 2 deletions b/‎backends/xnnpack/runtime/XNNExecutor.h‎
Lines changed: 8 additions & 2 deletions
@@ -5,13 +5,22 @@
 #include <thread>
 #include <vector>
 
+#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
+
+#include <executorch/runtime/backend/interface.h>
+#include <executorch/runtime/backend/options.h>
 #include <executorch/runtime/executor/program.h>
 #include <executorch/runtime/platform/runtime.h>
 
 #include <executorch/extension/data_loader/file_data_loader.h>
 #include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
 #include <executorch/extension/runner_util/inputs.h>
 
+using executorch::backends::xnnpack::workspace_sharing_mode_option_key;
+using executorch::backends::xnnpack::WorkspaceSharingMode;
+using executorch::backends::xnnpack::xnnpack_backend_key;
+
+using executorch::runtime::BackendOptions;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::HierarchicalAllocator;
@@ -126,34 +135,61 @@ class XNNPACKMultiDelegateTest : public ETPTEMethodRunBaseTest {
     num_threads = 40;
     kMethodName = "forward";
   }
-};
 
-// This test is to validate the assumption that the delegate is thread safe.
-// That includes the following:
-// 1. The delegate can be initilized by multiple threads in parallel.
-// 2. The delegate can be executed by multiple threads in parallel.
-// 3. The delegate can be destroyed by multiple threads in parallel.
-// Regardless of the underlying implementation of the delegate.
-// This is particularly important when we have shared resources across
-// delegate instances through a singleton backend instance.
-TEST_F(XNNPACKMultiDelegateTest, MultipleThreads) {
-  ASSERT_NE(kTestPTE1Path.size(), 0);
-  ASSERT_NE(kTestPTE2Path.size(), 0);
-  ASSERT_NE(num_threads, 0);
-  ASSERT_NE(kMethodName.size(), 0);
-
-  std::vector<std::thread> threads(num_threads);
-  std::atomic<size_t> count{0};
-
-  for (int i = 0; i < num_threads; i++) {
-    threads[i] = std::thread([&, i]() {
-      run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
-    });
+  // This test is to validate the assumption that the delegate is thread safe.
+  // That includes the following:
+  // 1. The delegate can be initilized by multiple threads in parallel.
+  // 2. The delegate can be executed by multiple threads in parallel.
+  // 3. The delegate can be destroyed by multiple threads in parallel.
+  // Regardless of the underlying implementation of the delegate.
+  // This is particularly important when we have shared resources across
+  // delegate instances through a singleton backend instance.
+  void runStressTest() {
+    ASSERT_NE(kTestPTE1Path.size(), 0);
+    ASSERT_NE(kTestPTE2Path.size(), 0);
+    ASSERT_NE(num_threads, 0);
+    ASSERT_NE(kMethodName.size(), 0);
+
+    std::vector<std::thread> threads(num_threads);
+    std::atomic<size_t> count{0};
+
+    for (int i = 0; i < num_threads; i++) {
+      threads[i] = std::thread([&, i]() {
+        run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
+      });
+    }
+    for (int i = 0; i < num_threads; i++) {
+      threads[i].join();
+    }
+    ASSERT_EQ(count, num_threads);
   }
-  for (int i = 0; i < num_threads; i++) {
-    threads[i].join();
+
+  void setWorkspaceSharingMode(WorkspaceSharingMode mode) {
+    executorch::runtime::runtime_init();
+
+    BackendOptions<1> backend_options;
+    backend_options.set_option(
+        workspace_sharing_mode_option_key, static_cast<int>(mode));
+
+    auto status = executorch::runtime::set_option(
+        xnnpack_backend_key, backend_options.view());
+    ASSERT_EQ(status, Error::Ok);
   }
-  ASSERT_EQ(count, num_threads);
+};
+
+TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsSharingDisabled) {
+  setWorkspaceSharingMode(WorkspaceSharingMode::Disabled);
+  runStressTest();
+}
+
+TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsPerModelSharing) {
+  setWorkspaceSharingMode(WorkspaceSharingMode::PerModel);
+  runStressTest();
+}
+
+TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsGlobalSharing) {
+  setWorkspaceSharingMode(WorkspaceSharingMode::Global);
+  runStressTest();
 }
 
 // TODO(T208989291): Add more tests here. For example,
 
@@ -1895,24 +1895,16 @@ ET_NODISCARD Error XNNCompiler::compileModel(
   xnn_weights_cache_t weights_cache_ptr = nullptr;
 #endif
 
-#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-  ET_CHECK_OR_RETURN_ERROR(
-      workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
+  // NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to
+  // be null
   status = xnn_create_runtime_v4(
       subgraph.get(),
       weights_cache_ptr,
       workspace,
       ::executorch::extension::threadpool::get_pthreadpool(),
       runtime_flags,
       &runtime_ptr);
-#else
-  status = xnn_create_runtime_v3(
-      subgraph.get(),
-      weights_cache_ptr,
-      ::executorch::extension::threadpool::get_pthreadpool(),
-      runtime_flags,
-      &runtime_ptr);
-#endif
+  // NOLINTEND(facebook-hte-NullableDereference)
 
   ET_CHECK_OR_RETURN_ERROR(
       xnn_status_success == status,
 
@@ -9,13 +9,13 @@
 #pragma once
 
 #include <executorch/backends/xnnpack/runtime/XNNStatus.h>
+#include <executorch/backends/xnnpack/runtime/XNNWorkspace.h>
 #include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/exec_aten/util/tensor_util.h>
 
 #include <xnnpack.h>
-#include <map>
 #include <memory>
 #include <vector>
 
@@ -35,9 +35,11 @@ class XNNExecutor {
   std::vector<uint32_t> output_ids_;
   std::vector<xnn_external_value> externals_;
   std::vector<std::string> packed_data_names_;
+  std::shared_ptr<XNNWorkspace> workspace_;
 
  public:
-  XNNExecutor() = default;
+  XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
+      : workspace_(workspace) {}
 
   inline size_t getNumInputs() {
     return input_ids_.size();
@@ -51,6 +53,10 @@ class XNNExecutor {
     return packed_data_names_;
   }
 
+  inline std::shared_ptr<XNNWorkspace> get_workspace() {
+    return workspace_;
+  }
+
   /**
    * Initialize the XNNExecutor with a given runtime and input/output ids.
    * The input/output ids are expected to be sorted in order of their