|
25 | 25 | import org.elasticsearch.common.util.concurrent.EsRejectedExecutionHandler; |
26 | 26 | import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor; |
27 | 27 | import org.elasticsearch.common.util.concurrent.ThreadContext; |
| 28 | +import org.elasticsearch.common.util.concurrent.ThrottledTaskRunner; |
28 | 29 | import org.elasticsearch.core.Nullable; |
29 | 30 | import org.elasticsearch.core.TimeValue; |
30 | 31 | import org.elasticsearch.node.Node; |
@@ -70,32 +71,66 @@ public class ThreadPool implements ReportingService<ThreadPoolInfo>, Scheduler, |
70 | 71 | private static final Logger logger = LogManager.getLogger(ThreadPool.class); |
71 | 72 |
|
72 | 73 | /** |
73 | | - * List of names that identify Java thread pools that are created in {@link ThreadPool#ThreadPool}. |
| 74 | + * List of names that identify Java thread pools that are created in {@link ThreadPool#ThreadPool}. The pools themselves are constructed |
| 75 | + * and configured using {@link DefaultBuiltInExecutorBuilders}. |
74 | 76 | */ |
75 | 77 | public static class Names { |
76 | 78 | /** |
77 | | - * All the tasks that do not relate to the purpose of one of the other thread pools should use this thread pool. Try to pick one of |
78 | | - * the other more specific thread pools where possible. |
| 79 | + * A thread pool with a very high (but finite) maximum size. Use only after careful consideration. |
| 80 | + * <p> |
| 81 | + * This pool may be used for one-off CPU-bound activities, but its maximum size is so high that it doesn't really work well to do a |
| 82 | + * lot of CPU-bound work in parallel here: submitting more CPU-bound tasks than we have CPUs to run them will burn a lot of CPU just |
| 83 | + * context-switching in order to try and make fair progress on all the threads at once. Better to submit fewer tasks and wait for |
| 84 | + * them to complete before submitting more, for instance using {@link ThrottledTaskRunner} and friends. |
| 85 | + * <p> |
| 86 | + * Likewise you can do IO on this pool, but using it for lots of concurrent IO is likely harmful in clusters with poor concurrent IO |
| 87 | + * performance (especially if using spinning disks). |
| 88 | + * <p> |
| 89 | + * Blocking on a future on this pool risks deadlock if there's a chance that the completion of the future depends on work being done |
| 90 | + * on this pool. Unfortunately that's pretty likely in most cases because of how often this pool is used; it's really rare to hit |
| 91 | + * such a deadlock because of the high limit on the pool size, but when it happens it is extremely harmful to the node. For more |
| 92 | + * information, see e.g. {@code UnsafePlainActionFuture}. |
| 93 | + * <p> |
| 94 | + * This pool is for instance used for recovery-related work, which is a mix of CPU-bound and IO-bound work and does not block on |
| 95 | + * futures. The recovery subsystem bounds its own concurrency, and therefore the amount of recovery work done on the {@code |
| 96 | + * #GENERIC} pool, via {@code cluster.routing.allocation.node_concurrent_recoveries} and related settings. This pool is a good |
| 97 | + * choice for recovery work because the threads used by recovery will be used by other {@code #GENERIC} work too rather than mostly |
| 98 | + * sitting idle until cleaned up. Idle threads are surprisingly costly sometimes. |
| 99 | + * <p> |
| 100 | + * This pool does not reject any task. Tasks you submit to this executor after the pool starts to shut down may simply never run. |
79 | 101 | */ |
80 | 102 | public static final String GENERIC = "generic"; |
| 103 | + |
81 | 104 | /** |
82 | | - * Important management tasks that keep the cluster from falling apart. |
83 | | - * This thread pool ensures cluster coordination tasks do not get blocked by less critical tasks and can continue to make progress. |
84 | | - * This thread pool also defaults to a single thread, reducing contention on the Coordinator mutex. |
| 105 | + * A thread pool solely for the use of the cluster coordination subsystem that relates to cluster state updates, master elections, |
| 106 | + * cluster membership and so on. |
| 107 | + * <p> |
| 108 | + * This pool defaults to a single thread to avoid contention on {@code Coordinator#mutex}. |
85 | 109 | */ |
86 | 110 | public static final String CLUSTER_COORDINATION = "cluster_coordination"; |
| 111 | + |
87 | 112 | public static final String GET = "get"; |
88 | 113 | public static final String ANALYZE = "analyze"; |
89 | 114 | public static final String WRITE = "write"; |
90 | 115 | public static final String SEARCH = "search"; |
91 | 116 | public static final String SEARCH_COORDINATION = "search_coordination"; |
92 | 117 | public static final String AUTO_COMPLETE = "auto_complete"; |
93 | 118 | public static final String SEARCH_THROTTLED = "search_throttled"; |
| 119 | + |
94 | 120 | /** |
95 | | - * Cluster management tasks. Tasks that manage data, and tasks that report on cluster health via statistics etc. |
96 | | - * Not a latency sensitive thread pool: some tasks may time be long-running; and the thread pool size is limited / relatively small. |
| 121 | + * A thread pool for running tasks related to cluster management, including collecting and exposing stats in APIs and certain other |
| 122 | + * internal tasks. |
| 123 | + * <p> |
| 124 | + * This pool is deliberately small in order to throttle the rate at which such tasks are executed and avoid diverting resources away |
| 125 | + * from production-critical work such as indexing and search. You may run long-running (CPU-bound or IO-bound) tasks on this pool, |
| 126 | + * but if the work relates to a REST API call then it must be cancellable in order to prevent an overexcited client from blocking or |
| 127 | + * delaying other management work. |
| 128 | + * <p> |
| 129 | + * Note that a cluster with overloaded {@code MANAGEMENT} pools will typically struggle to respond to stats APIs and may be hard to |
| 130 | + * troubleshoot. |
97 | 131 | */ |
98 | 132 | public static final String MANAGEMENT = "management"; |
| 133 | + |
99 | 134 | public static final String FLUSH = "flush"; |
100 | 135 | public static final String REFRESH = "refresh"; |
101 | 136 | public static final String WARMER = "warmer"; |
|
0 commit comments