Skip to content

Commit fd7866a

Browse files
authored
Expand some threadpool docs (#123244)
1 parent 10a0b9b commit fd7866a

File tree

1 file changed

+43
-8
lines changed

1 file changed

+43
-8
lines changed

server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionHandler;
2626
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
2727
import org.elasticsearch.common.util.concurrent.ThreadContext;
28+
import org.elasticsearch.common.util.concurrent.ThrottledTaskRunner;
2829
import org.elasticsearch.core.Nullable;
2930
import org.elasticsearch.core.TimeValue;
3031
import org.elasticsearch.node.Node;
@@ -70,32 +71,66 @@ public class ThreadPool implements ReportingService<ThreadPoolInfo>, Scheduler,
7071
private static final Logger logger = LogManager.getLogger(ThreadPool.class);
7172

7273
/**
73-
* List of names that identify Java thread pools that are created in {@link ThreadPool#ThreadPool}.
74+
* List of names that identify Java thread pools that are created in {@link ThreadPool#ThreadPool}. The pools themselves are constructed
75+
* and configured using {@link DefaultBuiltInExecutorBuilders}.
7476
*/
7577
public static class Names {
7678
/**
77-
* All the tasks that do not relate to the purpose of one of the other thread pools should use this thread pool. Try to pick one of
78-
* the other more specific thread pools where possible.
79+
* A thread pool with a very high (but finite) maximum size. Use only after careful consideration.
80+
* <p>
81+
* This pool may be used for one-off CPU-bound activities, but its maximum size is so high that it doesn't really work well to do a
82+
* lot of CPU-bound work in parallel here: submitting more CPU-bound tasks than we have CPUs to run them will burn a lot of CPU just
83+
* context-switching in order to try and make fair progress on all the threads at once. Better to submit fewer tasks and wait for
84+
* them to complete before submitting more, for instance using {@link ThrottledTaskRunner} and friends.
85+
* <p>
86+
* Likewise you can do IO on this pool, but using it for lots of concurrent IO is likely harmful in clusters with poor concurrent IO
87+
* performance (especially if using spinning disks).
88+
* <p>
89+
* Blocking on a future on this pool risks deadlock if there's a chance that the completion of the future depends on work being done
90+
* on this pool. Unfortunately that's pretty likely in most cases because of how often this pool is used; it's really rare to hit
91+
* such a deadlock because of the high limit on the pool size, but when it happens it is extremely harmful to the node. For more
92+
* information, see e.g. {@code UnsafePlainActionFuture}.
93+
* <p>
94+
* This pool is for instance used for recovery-related work, which is a mix of CPU-bound and IO-bound work and does not block on
95+
* futures. The recovery subsystem bounds its own concurrency, and therefore the amount of recovery work done on the {@code
96+
* #GENERIC} pool, via {@code cluster.routing.allocation.node_concurrent_recoveries} and related settings. This pool is a good
97+
* choice for recovery work because the threads used by recovery will be used by other {@code #GENERIC} work too rather than mostly
98+
* sitting idle until cleaned up. Idle threads are surprisingly costly sometimes.
99+
* <p>
100+
* This pool does not reject any task. Tasks you submit to this executor after the pool starts to shut down may simply never run.
79101
*/
80102
public static final String GENERIC = "generic";
103+
81104
/**
82-
* Important management tasks that keep the cluster from falling apart.
83-
* This thread pool ensures cluster coordination tasks do not get blocked by less critical tasks and can continue to make progress.
84-
* This thread pool also defaults to a single thread, reducing contention on the Coordinator mutex.
105+
* A thread pool solely for the use of the cluster coordination subsystem that relates to cluster state updates, master elections,
106+
* cluster membership and so on.
107+
* <p>
108+
* This pool defaults to a single thread to avoid contention on {@code Coordinator#mutex}.
85109
*/
86110
public static final String CLUSTER_COORDINATION = "cluster_coordination";
111+
87112
public static final String GET = "get";
88113
public static final String ANALYZE = "analyze";
89114
public static final String WRITE = "write";
90115
public static final String SEARCH = "search";
91116
public static final String SEARCH_COORDINATION = "search_coordination";
92117
public static final String AUTO_COMPLETE = "auto_complete";
93118
public static final String SEARCH_THROTTLED = "search_throttled";
119+
94120
/**
95-
* Cluster management tasks. Tasks that manage data, and tasks that report on cluster health via statistics etc.
96-
* Not a latency sensitive thread pool: some tasks may time be long-running; and the thread pool size is limited / relatively small.
121+
* A thread pool for running tasks related to cluster management, including collecting and exposing stats in APIs and certain other
122+
* internal tasks.
123+
* <p>
124+
* This pool is deliberately small in order to throttle the rate at which such tasks are executed and avoid diverting resources away
125+
* from production-critical work such as indexing and search. You may run long-running (CPU-bound or IO-bound) tasks on this pool,
126+
* but if the work relates to a REST API call then it must be cancellable in order to prevent an overexcited client from blocking or
127+
* delaying other management work.
128+
* <p>
129+
* Note that a cluster with overloaded {@code MANAGEMENT} pools will typically struggle to respond to stats APIs and may be hard to
130+
* troubleshoot.
97131
*/
98132
public static final String MANAGEMENT = "management";
133+
99134
public static final String FLUSH = "flush";
100135
public static final String REFRESH = "refresh";
101136
public static final String WARMER = "warmer";

0 commit comments

Comments
 (0)