File tree Expand file tree Collapse file tree 3 files changed +47
-2
lines changed
Expand file tree Collapse file tree 3 files changed +47
-2
lines changed Original file line number Diff line number Diff line change @@ -34,7 +34,10 @@ def define_common_targets():
3434 ],
3535 exported_preprocessor_flags = [
3636 "-DET_USE_THREADPOOL" ,
37- ],
37+ ] + (
38+ # Default to perf heuristic (0) in OSS, all cores (-1) otherwise.
39+ ["-DEXECUTORCH_THREADPOOL_SIZE=0" ] if runtime .is_oss else ["-DEXECUTORCH_THREADPOOL_SIZE=-1" ]
40+ ),
3841 visibility = [
3942 "//executorch/..." ,
4043 "//executorch/backends/..." ,
Original file line number Diff line number Diff line change @@ -101,7 +101,23 @@ ThreadPool* get_threadpool() {
101101 return nullptr ; // NOLINT(facebook-hte-NullableReturn)
102102 }
103103
104- int num_threads = cpuinfo_get_processors_count ();
104+ int num_cores = cpuinfo_get_processors_count ();
105+
106+ // Choose the number of threads according to the EXECUTORCH_THREADPOOL_SIZE
107+ // value. See the description in threadpool.h.
108+
109+ #if defined(EXECUTORCH_THREADPOOL_SIZE) && ((EXECUTORCH_THREADPOOL_SIZE) > 0)
110+ // Use an explicit threadpool size.
111+ int num_threads = EXECUTORCH_THREADPOOL_SIZE;
112+ #elif defined(EXECUTORCH_THREADPOOL_SIZE) && \
113+ ((EXECUTORCH_THREADPOOL_SIZE) == -1 )
114+ // Use threads=cores.
115+ int num_threads = num_cores;
116+ #else
117+ // Use a performance heuristic.
118+ int num_threads = num_cores / 2 ;
119+ #endif
120+
105121 /*
106122 * For llvm-tsan, holding limit for the number of locks for a single thread
107123 * is 63 (because of comparison < 64 instead of <=). pthreadpool's worst
Original file line number Diff line number Diff line change 1414
1515#include < pthreadpool.h>
1616
17+ /*
18+ * Threadpool Options:
19+ *
20+ * Threadpool size has a sizble affect on performance. The following
21+ * options are exposed to control the threadpool size.
22+ *
23+ * EXECUTORCH_THREADPOOL_SIZE: int - Set the size of the threadpool,
24+ * in number of threads.
25+ *
26+ * Special Values:
27+ * - 0: Use a perforance heuristic to determine the default size,
28+ * based on the active hardware. This is the default mode
29+ * for CMake.
30+ * - -1: Set the thread count equal to the number of cores on the
31+ * active hardware.
32+ *
33+ * Any other positive value will be interpreted as a thread count.
34+ * For example, setting EXECUTORCH_THREADPOOL_SIZE=4 will default
35+ * the threadpool to use 4 threads.
36+ */
37+
38+ #ifndef EXECUTORCH_THREADPOOL_SIZE
39+ // Default to using a runtime heuristic.
40+ #define EXECUTORCH_THREADPOOL_SIZE 0
41+ #endif
42+
1743namespace executorch ::extension::threadpool {
1844
1945class ThreadPool final {
You can’t perform that action at this time.
0 commit comments