Skip to content

Commit 7f0723e

Browse files
committed
Reapply "Add EXECUTORCH_THREADPOOL_SIZE options, default to u… (#14307)
This reverts commit 750cba7.
1 parent f174974 commit 7f0723e

File tree

7 files changed

+110
-2
lines changed

7 files changed

+110
-2
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
266266
executorch_move_interface_include_directories_to_build_time_only(
267267
pthreadpool_interface
268268
)
269+
270+
if(APPLE)
271+
# Use hidden visibility for pthreadpool on Apple platforms to avoid issues
272+
# with pthreadpool symbols from libtorch_cpu taking precedence over the ones
273+
# from the pthreadpool library statically linked in _portable_lib. The
274+
# pthreadpool public APIs are marked as weak by default on some Apple
275+
# platforms, so setting to hidden visibility works around this by not
276+
# putting the symbol in the indirection table. See
277+
# https://github.com/pytorch/executorch/issues/14321 for more details.
278+
target_compile_options(pthreadpool PRIVATE -fvisibility=hidden)
279+
endif()
280+
269281
install(
270282
TARGETS pthreadpool pthreadpool_interface fxdiv
271283
EXPORT ExecuTorchTargets

extension/threadpool/CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ if(NOT CMAKE_CXX_STANDARD)
2020
set(CMAKE_CXX_STANDARD 17)
2121
endif()
2222

23+
# Threadpool size specifiers. Mutual exclusion is checking in default.cmake.
24+
# Default to using performance cores if
25+
# EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES isn't set.
26+
set(_threadpool_size_flag)
27+
if(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
28+
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES")
29+
else()
30+
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES")
31+
endif()
32+
2333
add_library(
2434
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
2535
cpuinfo_utils.cpp
@@ -36,7 +46,9 @@ target_include_directories(
3646
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include>
3747
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include>
3848
)
39-
target_compile_definitions(extension_threadpool PUBLIC ET_USE_THREADPOOL)
49+
target_compile_definitions(
50+
extension_threadpool PUBLIC ET_USE_THREADPOOL ${_threadpool_size_flag}
51+
)
4052
target_compile_options(extension_threadpool PUBLIC ${_common_compile_options})
4153

4254
# Install libraries

extension/threadpool/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def define_common_targets():
2222
name = "threadpool_lib",
2323
srcs = _THREADPOOL_SRCS,
2424
deps = [
25+
":cpuinfo_utils",
2526
"//executorch/runtime/core:core",
2627
"//executorch/runtime/core/portable_type/c10/c10:c10",
2728
],

extension/threadpool/test/threadpool_test.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/extension/threadpool/threadpool.h>
10+
#include <executorch/runtime/platform/runtime.h>
1011

1112
#include <mutex>
1213
#include <numeric>
@@ -71,6 +72,8 @@ void run_lambda_with_size(
7172
} // namespace
7273

7374
TEST(ThreadPoolTest, ParallelAdd) {
75+
executorch::runtime::runtime_init();
76+
7477
std::vector<int32_t> a, b, c, c_ref;
7578
size_t vector_size = 100;
7679
size_t grain_size = 10;
@@ -111,6 +114,8 @@ TEST(ThreadPoolTest, ParallelAdd) {
111114

112115
// Test parallel reduction where we acquire lock within lambda
113116
TEST(ThreadPoolTest, ParallelReduce) {
117+
executorch::runtime::runtime_init();
118+
114119
std::vector<int32_t> a;
115120
int32_t c = 0, c_ref = 0;
116121
size_t vector_size = 100;
@@ -144,6 +149,8 @@ TEST(ThreadPoolTest, ParallelReduce) {
144149
// Copied from
145150
// caffe2/aten/src/ATen/test/test_thread_pool_guard.cp
146151
TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
152+
executorch::runtime::runtime_init();
153+
147154
auto threadpool_ptr = ::executorch::extension::threadpool::get_pthreadpool();
148155

149156
ASSERT_NE(threadpool_ptr, nullptr);
@@ -173,6 +180,8 @@ TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
173180
}
174181

175182
TEST(TestNoThreadPoolGuard, TestRunWithGuard) {
183+
executorch::runtime::runtime_init();
184+
176185
const std::vector<int64_t> array = {1, 2, 3};
177186

178187
auto pool = ::executorch::extension::threadpool::get_threadpool();

extension/threadpool/threadpool.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,34 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <executorch/extension/threadpool/cpuinfo_utils.h>
910
#include <executorch/extension/threadpool/threadpool.h>
1011

1112
#include <algorithm>
1213
#include <memory>
1314

1415
#include <executorch/extension/threadpool/threadpool_guard.h>
1516
#include <executorch/runtime/platform/assert.h>
17+
#include <executorch/runtime/platform/runtime.h>
1618

1719
#include <cpuinfo.h>
1820

21+
// At most one mode should be set.
22+
#if ( \
23+
defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
24+
defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES))
25+
#error Multiple \
26+
threadpool size specifiers are set.At most one of \
27+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES, \
28+
and EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES may be defined.
29+
#endif
30+
31+
// Default to EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES if no mode is set.
32+
#if !defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
33+
!defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES)
34+
#define EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES 1
35+
#endif
36+
1937
namespace executorch::extension::threadpool {
2038

2139
#if !(defined(WIN32))
@@ -95,13 +113,22 @@ void ThreadPool::run(
95113
// get_threadpool is not thread safe due to leak_corrupted_threadpool
96114
// Make this part threadsafe: TODO(kimishpatel)
97115
ThreadPool* get_threadpool() {
116+
executorch::runtime::runtime_init();
117+
98118
if (!cpuinfo_initialize()) {
99119
ET_LOG(Error, "cpuinfo initialization failed");
100120
return nullptr; // NOLINT(facebook-hte-NullableReturn)
101121
}
102122

103123
static const int num_threads = ([]() {
104-
int result = cpuinfo_get_processors_count();
124+
#if defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
125+
// Use threads=cores.
126+
auto result = cpuinfo_get_processors_count();
127+
#else
128+
// Set threads equal to the number of performance cores.
129+
auto result =
130+
::executorch::extension::cpuinfo::get_num_performant_cores();
131+
#endif
105132

106133
/*
107134
* For llvm-tsan, holding limit for the number of locks for a single thread
@@ -114,6 +141,7 @@ ThreadPool* get_threadpool() {
114141
constexpr int tsan_thread_limit = 63;
115142
return std::min(result, tsan_thread_limit);
116143
})();
144+
117145
static auto threadpool = std::make_unique<ThreadPool>(num_threads);
118146

119147
// Inheriting from old threadpool to get around segfault issue

extension/threadpool/threadpool.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,22 @@
1414

1515
#include <pthreadpool.h>
1616

17+
/*
18+
* Threadpool Options:
19+
*
20+
* Threadpool size has a sizble affect on performance. By default, the
21+
* threadpool will be sized according to the number of performance cores. This
22+
* behavior can be overriden with the following build-time options. Note that
23+
* these options are mutually exclusive.
24+
*
25+
* - EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES (flag) - Sizes the threadpool
26+
* equal to the number of performance cores on the system. This is the default
27+
* behavior.
28+
* - EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES (flag) - Sizes the threadpool
29+
* equal to the number of logical cores on system. This is the historical
30+
* behavior.
31+
*/
32+
1733
namespace executorch::extension::threadpool {
1834

1935
class ThreadPool final {

tools/cmake/preset/default.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,36 @@ define_overridable_option(
176176
${_default_executorch_build_cpuinfo}
177177
)
178178

179+
# Threadpool size options. At most one can be specified. Note that the default
180+
# is managed in threadpool.cpp to allow the user to specify an alternate mode
181+
# without needing to explicitly set the default to off.
182+
define_overridable_option(
183+
EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES
184+
"Set the number of threads used for CPU parallel computation equal to the number of performant CPU cores."
185+
BOOL
186+
OFF
187+
)
188+
define_overridable_option(
189+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
190+
"Set the number of threads used for CPU parallel computation equal to the number of logical CPU cores."
191+
BOOL
192+
OFF
193+
)
194+
195+
check_required_options_on(
196+
IF_ON EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES REQUIRES
197+
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
198+
)
199+
check_required_options_on(
200+
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES REQUIRES
201+
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
202+
)
203+
204+
check_conflicting_options_on(
205+
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES CONFLICTS_WITH
206+
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
207+
)
208+
179209
# TODO(jathu): move this to platform specific presets when created
180210
set(_default_executorch_build_executor_runner ON)
181211
if(APPLE AND "${SDK_NAME}" STREQUAL "iphoneos")

0 commit comments

Comments
 (0)