Skip to content

Commit a523306

Browse files
authored
Add XNNPACK backend option for workspace sharing (re-land)
Differential Revision: D81647105 Pull Request resolved: #13934
1 parent fd04fe0 commit a523306

14 files changed

+999
-81
lines changed

backends/test/multi_method_delegate_test.cpp

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,22 @@
55
#include <thread>
66
#include <vector>
77

8+
#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
9+
10+
#include <executorch/runtime/backend/interface.h>
11+
#include <executorch/runtime/backend/options.h>
812
#include <executorch/runtime/executor/program.h>
913
#include <executorch/runtime/platform/runtime.h>
1014

1115
#include <executorch/extension/data_loader/file_data_loader.h>
1216
#include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
1317
#include <executorch/extension/runner_util/inputs.h>
1418

19+
using executorch::backends::xnnpack::workspace_sharing_mode_option_key;
20+
using executorch::backends::xnnpack::WorkspaceSharingMode;
21+
using executorch::backends::xnnpack::xnnpack_backend_key;
22+
23+
using executorch::runtime::BackendOptions;
1524
using executorch::runtime::Error;
1625
using executorch::runtime::EValue;
1726
using executorch::runtime::HierarchicalAllocator;
@@ -126,34 +135,61 @@ class XNNPACKMultiDelegateTest : public ETPTEMethodRunBaseTest {
126135
num_threads = 40;
127136
kMethodName = "forward";
128137
}
129-
};
130138

131-
// This test is to validate the assumption that the delegate is thread safe.
132-
// That includes the following:
133-
// 1. The delegate can be initilized by multiple threads in parallel.
134-
// 2. The delegate can be executed by multiple threads in parallel.
135-
// 3. The delegate can be destroyed by multiple threads in parallel.
136-
// Regardless of the underlying implementation of the delegate.
137-
// This is particularly important when we have shared resources across
138-
// delegate instances through a singleton backend instance.
139-
TEST_F(XNNPACKMultiDelegateTest, MultipleThreads) {
140-
ASSERT_NE(kTestPTE1Path.size(), 0);
141-
ASSERT_NE(kTestPTE2Path.size(), 0);
142-
ASSERT_NE(num_threads, 0);
143-
ASSERT_NE(kMethodName.size(), 0);
144-
145-
std::vector<std::thread> threads(num_threads);
146-
std::atomic<size_t> count{0};
147-
148-
for (int i = 0; i < num_threads; i++) {
149-
threads[i] = std::thread([&, i]() {
150-
run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
151-
});
139+
// This test is to validate the assumption that the delegate is thread safe.
140+
// That includes the following:
141+
// 1. The delegate can be initilized by multiple threads in parallel.
142+
// 2. The delegate can be executed by multiple threads in parallel.
143+
// 3. The delegate can be destroyed by multiple threads in parallel.
144+
// Regardless of the underlying implementation of the delegate.
145+
// This is particularly important when we have shared resources across
146+
// delegate instances through a singleton backend instance.
147+
void runStressTest() {
148+
ASSERT_NE(kTestPTE1Path.size(), 0);
149+
ASSERT_NE(kTestPTE2Path.size(), 0);
150+
ASSERT_NE(num_threads, 0);
151+
ASSERT_NE(kMethodName.size(), 0);
152+
153+
std::vector<std::thread> threads(num_threads);
154+
std::atomic<size_t> count{0};
155+
156+
for (int i = 0; i < num_threads; i++) {
157+
threads[i] = std::thread([&, i]() {
158+
run(i, i % 7 ? kTestPTE1Path : kTestPTE2Path, kMethodName, count);
159+
});
160+
}
161+
for (int i = 0; i < num_threads; i++) {
162+
threads[i].join();
163+
}
164+
ASSERT_EQ(count, num_threads);
152165
}
153-
for (int i = 0; i < num_threads; i++) {
154-
threads[i].join();
166+
167+
void setWorkspaceSharingMode(WorkspaceSharingMode mode) {
168+
executorch::runtime::runtime_init();
169+
170+
BackendOptions<1> backend_options;
171+
backend_options.set_option(
172+
workspace_sharing_mode_option_key, static_cast<int>(mode));
173+
174+
auto status = executorch::runtime::set_option(
175+
xnnpack_backend_key, backend_options.view());
176+
ASSERT_EQ(status, Error::Ok);
155177
}
156-
ASSERT_EQ(count, num_threads);
178+
};
179+
180+
TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsSharingDisabled) {
181+
setWorkspaceSharingMode(WorkspaceSharingMode::Disabled);
182+
runStressTest();
183+
}
184+
185+
TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsPerModelSharing) {
186+
setWorkspaceSharingMode(WorkspaceSharingMode::PerModel);
187+
runStressTest();
188+
}
189+
190+
TEST_F(XNNPACKMultiDelegateTest, MultipleThreadsGlobalSharing) {
191+
setWorkspaceSharingMode(WorkspaceSharingMode::Global);
192+
runStressTest();
157193
}
158194

159195
// TODO(T208989291): Add more tests here. For example,

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,24 +1896,16 @@ ET_NODISCARD Error XNNCompiler::compileModel(
18961896
xnn_weights_cache_t weights_cache_ptr = nullptr;
18971897
#endif
18981898

1899-
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
1900-
ET_CHECK_OR_RETURN_ERROR(
1901-
workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
1899+
// NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to
1900+
// be null
19021901
status = xnn_create_runtime_v4(
19031902
subgraph.get(),
19041903
weights_cache_ptr,
19051904
workspace,
19061905
::executorch::extension::threadpool::get_pthreadpool(),
19071906
runtime_flags,
19081907
&runtime_ptr);
1909-
#else
1910-
status = xnn_create_runtime_v3(
1911-
subgraph.get(),
1912-
weights_cache_ptr,
1913-
::executorch::extension::threadpool::get_pthreadpool(),
1914-
runtime_flags,
1915-
&runtime_ptr);
1916-
#endif
1908+
// NOLINTEND(facebook-hte-NullableDereference)
19171909

19181910
ET_CHECK_OR_RETURN_ERROR(
19191911
xnn_status_success == status,

backends/xnnpack/runtime/XNNExecutor.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
#pragma once
1010

1111
#include <executorch/backends/xnnpack/runtime/XNNStatus.h>
12+
#include <executorch/backends/xnnpack/runtime/XNNWorkspace.h>
1213
#include <executorch/backends/xnnpack/runtime/profiling/XNNProfiler.h>
1314
#include <executorch/runtime/backend/interface.h>
1415
#include <executorch/runtime/core/error.h>
1516
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
1617

1718
#include <xnnpack.h>
18-
#include <map>
1919
#include <memory>
2020
#include <vector>
2121

@@ -35,9 +35,11 @@ class XNNExecutor {
3535
std::vector<uint32_t> output_ids_;
3636
std::vector<xnn_external_value> externals_;
3737
std::vector<std::string> packed_data_names_;
38+
std::shared_ptr<XNNWorkspace> workspace_;
3839

3940
public:
40-
XNNExecutor() = default;
41+
XNNExecutor(std::shared_ptr<XNNWorkspace> workspace)
42+
: workspace_(workspace) {}
4143

4244
inline size_t getNumInputs() {
4345
return input_ids_.size();
@@ -51,6 +53,10 @@ class XNNExecutor {
5153
return packed_data_names_;
5254
}
5355

56+
inline std::shared_ptr<XNNWorkspace> get_workspace() {
57+
return workspace_;
58+
}
59+
5460
/**
5561
* Initialize the XNNExecutor with a given runtime and input/output ids.
5662
* The input/output ids are expected to be sorted in order of their

0 commit comments

Comments
 (0)