Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion extension/threadpool/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@ if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()

# Threadpool size specifiers. Mutual exclusion is checking in default.cmake.
# Default to using performance cores if
# EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES isn't set.
set(_threadpool_size_flag)
if(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES")
else()
set(_threadpool_size_flag "EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES")
endif()

add_library(
extension_threadpool threadpool.cpp threadpool_guard.cpp thread_parallel.cpp
cpuinfo_utils.cpp
Expand All @@ -36,7 +46,9 @@ target_include_directories(
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include>
$<BUILD_INTERFACE:${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include>
)
target_compile_definitions(extension_threadpool PUBLIC ET_USE_THREADPOOL)
target_compile_definitions(
extension_threadpool PUBLIC ET_USE_THREADPOOL ${_threadpool_size_flag}
)
target_compile_options(extension_threadpool PUBLIC ${_common_compile_options})

# Install libraries
Expand Down
1 change: 1 addition & 0 deletions extension/threadpool/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def define_common_targets():
name = "threadpool_lib",
srcs = _THREADPOOL_SRCS,
deps = [
":cpuinfo_utils",
"//executorch/runtime/core:core",
"//executorch/runtime/core/portable_type/c10/c10:c10",
],
Expand Down
9 changes: 9 additions & 0 deletions extension/threadpool/test/threadpool_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/extension/threadpool/threadpool.h>
#include <executorch/runtime/platform/runtime.h>

#include <mutex>
#include <numeric>
Expand Down Expand Up @@ -71,6 +72,8 @@ void run_lambda_with_size(
} // namespace

TEST(ThreadPoolTest, ParallelAdd) {
executorch::runtime::runtime_init();

std::vector<int32_t> a, b, c, c_ref;
size_t vector_size = 100;
size_t grain_size = 10;
Expand Down Expand Up @@ -111,6 +114,8 @@ TEST(ThreadPoolTest, ParallelAdd) {

// Test parallel reduction where we acquire lock within lambda
TEST(ThreadPoolTest, ParallelReduce) {
executorch::runtime::runtime_init();

std::vector<int32_t> a;
int32_t c = 0, c_ref = 0;
size_t vector_size = 100;
Expand Down Expand Up @@ -144,6 +149,8 @@ TEST(ThreadPoolTest, ParallelReduce) {
// Copied from
// caffe2/aten/src/ATen/test/test_thread_pool_guard.cp
TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
executorch::runtime::runtime_init();

auto threadpool_ptr = ::executorch::extension::threadpool::get_pthreadpool();

ASSERT_NE(threadpool_ptr, nullptr);
Expand Down Expand Up @@ -173,6 +180,8 @@ TEST(TestNoThreadPoolGuard, TestThreadPoolGuard) {
}

TEST(TestNoThreadPoolGuard, TestRunWithGuard) {
executorch::runtime::runtime_init();

const std::vector<int64_t> array = {1, 2, 3};

auto pool = ::executorch::extension::threadpool::get_threadpool();
Expand Down
33 changes: 32 additions & 1 deletion extension/threadpool/threadpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/extension/threadpool/cpuinfo_utils.h>
#include <executorch/extension/threadpool/threadpool.h>

#include <algorithm>
Expand All @@ -14,9 +15,26 @@

#include <executorch/extension/threadpool/threadpool_guard.h>
#include <executorch/runtime/platform/assert.h>
#include <executorch/runtime/platform/runtime.h>

#include <cpuinfo.h>

// At most one mode should be set.
#if ( \
defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES))
#error Multiple \
threadpool size specifiers are set.At most one of \
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES, \
and EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES may be defined.
#endif

// Default to EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES if no mode is set.
#if !defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES) && \
!defined(EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES)
#define EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES 1
#endif

namespace executorch::extension::threadpool {

#if !(defined(WIN32))
Expand Down Expand Up @@ -96,12 +114,25 @@ void ThreadPool::run(
// get_threadpool is not thread safe due to leak_corrupted_threadpool
// Make this part threadsafe: TODO(kimishpatel)
ThreadPool* get_threadpool() {
executorch::runtime::runtime_init();

if (!cpuinfo_initialize()) {
ET_LOG(Error, "cpuinfo initialization failed");
return nullptr; // NOLINT(facebook-hte-NullableReturn)
}

int num_threads = cpuinfo_get_processors_count();
// Choose the number of threads according to the EXECUTORCH_THREADPOOL_
// options. See the description in threadpool.h.

#if defined(EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES)
// Use threads=cores.
static int num_threads = cpuinfo_get_processors_count();
#else
// Set threads equal to the number of performance cores.
static int num_threads =
::executorch::extension::cpuinfo::get_num_performant_cores();
#endif
Comment on lines +127 to +134
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

default behavior than seems get num performance cores? I thought you would want this the other way around. That is by default you have logical cores and in oss cmake we can make performant core as default build option.

Issue is that for internal uses now you only have performant cores

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In extension/threadpool/targets.bzl, I changed it to define EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES when not in OSS, so that should cover this case. If there's a better way to ensure this, I'm definitely open to it. I could add an API to retrieve the threadpool size and add an internal test to verify the behavior, if you'd like.


/*
* For llvm-tsan, holding limit for the number of locks for a single thread
* is 63 (because of comparison < 64 instead of <=). pthreadpool's worst
Expand Down
16 changes: 16 additions & 0 deletions extension/threadpool/threadpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@

#include <pthreadpool.h>

/*
* Threadpool Options:
*
* Threadpool size has a sizble affect on performance. By default, the
* threadpool will be sized according to the number of performance cores. This
* behavior can be overriden with the following build-time options. Note that
* these options are mutually exclusive.
*
* - EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES (flag) - Sizes the threadpool
* equal to the number of performance cores on the system. This is the default
* behavior.
* - EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES (flag) - Sizes the threadpool
* equal to the number of logical cores on system. This is the historical
* behavior.
*/

namespace executorch::extension::threadpool {

class ThreadPool final {
Expand Down
30 changes: 30 additions & 0 deletions tools/cmake/preset/default.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,36 @@ define_overridable_option(
${_default_executorch_build_cpuinfo}
)

# Threadpool size options. At most one can be specified. Note that the default
# is managed in threadpool.cpp to allow the user to specify an alternate mode
# without needing to explicitly set the default to off.
define_overridable_option(
EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES
"Set the number of threads used for CPU parallel computation equal to the number of performant CPU cores."
BOOL
OFF
)
define_overridable_option(
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
"Set the number of threads used for CPU parallel computation equal to the number of logical CPU cores."
BOOL
OFF
)

check_required_options_on(
IF_ON EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES REQUIRES
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
)
check_required_options_on(
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES REQUIRES
EXECUTORCH_BUILD_PTHREADPOOL EXECUTORCH_BUILD_CPUINFO
)

check_conflicting_options_on(
IF_ON EXECUTORCH_THREADPOOL_USE_PERFORMANCE_CORES CONFLICTS_WITH
EXECUTORCH_THREADPOOL_USE_ALL_LOGICAL_CORES
)

# TODO(jathu): move this to platform specific presets when created
set(_default_executorch_build_executor_runner ON)
if(APPLE AND "${SDK_NAME}" STREQUAL "iphoneos")
Expand Down
Loading