|
11 | 11 |
|
12 | 12 | import org.apache.logging.log4j.LogManager; |
13 | 13 | import org.apache.logging.log4j.Logger; |
| 14 | +import org.elasticsearch.common.settings.ClusterSettings; |
14 | 15 | import org.elasticsearch.common.settings.Setting; |
15 | 16 | import org.elasticsearch.common.settings.Setting.Property; |
16 | 17 | import org.elasticsearch.common.settings.Settings; |
|
43 | 44 | import java.util.concurrent.atomic.AtomicLong; |
44 | 45 | import java.util.concurrent.locks.Condition; |
45 | 46 | import java.util.concurrent.locks.ReentrantLock; |
| 47 | +import java.util.function.Consumer; |
46 | 48 | import java.util.function.LongUnaryOperator; |
47 | 49 | import java.util.function.ToLongFunction; |
48 | 50 |
|
@@ -182,37 +184,54 @@ public Iterator<Setting<?>> settings() { |
182 | 184 | private final int maxConcurrentMerges; |
183 | 185 | private final int concurrentMergesFloorLimitForThrottling; |
184 | 186 | private final int concurrentMergesCeilLimitForThrottling; |
185 | | - private final Scheduler.Cancellable diskSpaceMonitor; |
| 187 | + private final AvailableDiskSpacePeriodicMonitor availableDiskSpacePeriodicMonitor; |
186 | 188 |
|
187 | 189 | private final List<MergeEventListener> mergeEventListeners = new CopyOnWriteArrayList<>(); |
188 | 190 |
|
189 | 191 | public static @Nullable ThreadPoolMergeExecutorService maybeCreateThreadPoolMergeExecutorService( |
190 | 192 | ThreadPool threadPool, |
191 | 193 | Settings settings, |
| 194 | + ClusterSettings clusterSettings, |
192 | 195 | NodeEnvironment nodeEnvironment |
193 | 196 | ) { |
194 | 197 | if (ThreadPoolMergeScheduler.USE_THREAD_POOL_MERGE_SCHEDULER_SETTING.get(settings)) { |
195 | | - return new ThreadPoolMergeExecutorService(threadPool, settings, nodeEnvironment); |
| 198 | + return new ThreadPoolMergeExecutorService(threadPool, settings, clusterSettings, nodeEnvironment); |
196 | 199 | } else { |
197 | 200 | return null; |
198 | 201 | } |
199 | 202 | } |
200 | 203 |
|
201 | | - private ThreadPoolMergeExecutorService(ThreadPool threadPool, Settings settings, NodeEnvironment nodeEnvironment) { |
| 204 | + private ThreadPoolMergeExecutorService( |
| 205 | + ThreadPool threadPool, |
| 206 | + Settings settings, |
| 207 | + ClusterSettings clusterSettings, |
| 208 | + NodeEnvironment nodeEnvironment |
| 209 | + ) { |
202 | 210 | this.executorService = threadPool.executor(ThreadPool.Names.MERGE); |
203 | 211 | this.maxConcurrentMerges = threadPool.info(ThreadPool.Names.MERGE).getMax(); |
204 | 212 | // the intent here is to throttle down whenever we submit a task and no other task is running |
205 | 213 | this.concurrentMergesFloorLimitForThrottling = 2; |
206 | 214 | this.concurrentMergesCeilLimitForThrottling = maxConcurrentMerges * 2; |
207 | 215 | assert concurrentMergesFloorLimitForThrottling <= concurrentMergesCeilLimitForThrottling; |
208 | | - this.diskSpaceMonitor = threadPool.scheduleWithFixedDelay( |
209 | | - new DiskSpaceMonitor( |
| 216 | + this.availableDiskSpacePeriodicMonitor = new AvailableDiskSpacePeriodicMonitor( |
| 217 | + nodeEnvironment.dataPaths(), |
| 218 | + threadPool, |
210 | 219 | INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING.get(settings), |
211 | 220 | INDICES_MERGE_DISK_HIGH_MAX_HEADROOM_SETTING.get(settings), |
212 | | - nodeEnvironment.dataPaths() |
213 | | - ), |
214 | | - INDICES_MERGE_DISK_CHECK_INTERVAL_SETTING.get(settings), |
215 | | - threadPool.generic() |
| 221 | + INDICES_MERGE_DISK_CHECK_INTERVAL_SETTING.get(settings), |
| 222 | + (availableDiskSpaceByteSize) -> queuedMergeTasks.updateAvailableBudget(availableDiskSpaceByteSize.getBytes()) |
| 223 | + ); |
| 224 | + clusterSettings.addSettingsUpdateConsumer( |
| 225 | + INDICES_MERGE_DISK_HIGH_WATERMARK_SETTING, |
| 226 | + this.availableDiskSpacePeriodicMonitor::setHighStageWatermark |
| 227 | + ); |
| 228 | + clusterSettings.addSettingsUpdateConsumer( |
| 229 | + INDICES_MERGE_DISK_HIGH_MAX_HEADROOM_SETTING, |
| 230 | + this.availableDiskSpacePeriodicMonitor::setHighStageMaxHeadroom |
| 231 | + ); |
| 232 | + clusterSettings.addSettingsUpdateConsumer( |
| 233 | + INDICES_MERGE_DISK_CHECK_INTERVAL_SETTING, |
| 234 | + this.availableDiskSpacePeriodicMonitor::setCheckInterval |
216 | 235 | ); |
217 | 236 | } |
218 | 237 |
|
@@ -354,24 +373,69 @@ private void abortMergeTask(MergeTask mergeTask) { |
354 | 373 | } |
355 | 374 | } |
356 | 375 |
|
357 | | - class DiskSpaceMonitor implements Runnable { |
358 | | - private static final Logger LOGGER = LogManager.getLogger(ThreadPoolMergeExecutorService.DiskSpaceMonitor.class); |
359 | | - private final RelativeByteSizeValue highStageWatermark; |
360 | | - private final ByteSizeValue highStageMaxHeadroom; |
| 376 | + static class AvailableDiskSpacePeriodicMonitor implements Closeable { |
| 377 | + private static final Logger LOGGER = LogManager.getLogger(AvailableDiskSpacePeriodicMonitor.class); |
361 | 378 | private final NodeEnvironment.DataPath[] dataPaths; |
362 | | - |
363 | | - DiskSpaceMonitor( |
| 379 | + private final ThreadPool threadPool; |
| 380 | + private volatile RelativeByteSizeValue highStageWatermark; |
| 381 | + private volatile ByteSizeValue highStageMaxHeadroom; |
| 382 | + private volatile TimeValue checkInterval; |
| 383 | + private final Consumer<ByteSizeValue> updateConsumer; |
| 384 | + private volatile boolean closed; |
| 385 | + private volatile Scheduler.Cancellable monitor; |
| 386 | + |
| 387 | + AvailableDiskSpacePeriodicMonitor( |
| 388 | + NodeEnvironment.DataPath[] dataPaths, |
| 389 | + ThreadPool threadPool, |
364 | 390 | RelativeByteSizeValue highStageWatermark, |
365 | 391 | ByteSizeValue highStageMaxHeadroom, |
366 | | - NodeEnvironment.DataPath[] dataPaths |
| 392 | + TimeValue checkInterval, |
| 393 | + Consumer<ByteSizeValue> updateConsumer |
367 | 394 | ) { |
| 395 | + this.dataPaths = dataPaths; |
| 396 | + this.threadPool = threadPool; |
368 | 397 | this.highStageWatermark = highStageWatermark; |
369 | 398 | this.highStageMaxHeadroom = highStageMaxHeadroom; |
370 | | - this.dataPaths = dataPaths; |
| 399 | + this.checkInterval = checkInterval; |
| 400 | + this.updateConsumer = updateConsumer; |
| 401 | + this.closed = false; |
| 402 | + reschedule(); |
| 403 | + // early monitor run in the constructor |
| 404 | + run(); |
| 405 | + } |
| 406 | + |
| 407 | + public void setCheckInterval(TimeValue checkInterval) { |
| 408 | + this.checkInterval = checkInterval; |
| 409 | + reschedule(); |
| 410 | + } |
| 411 | + |
| 412 | + public void setHighStageWatermark(RelativeByteSizeValue highStageWatermark) { |
| 413 | + this.highStageWatermark = highStageWatermark; |
| 414 | + } |
| 415 | + |
| 416 | + public void setHighStageMaxHeadroom(ByteSizeValue highStageMaxHeadroom) { |
| 417 | + this.highStageMaxHeadroom = highStageMaxHeadroom; |
| 418 | + } |
| 419 | + |
| 420 | + private synchronized void reschedule() { |
| 421 | + if (monitor != null) { |
| 422 | + monitor.cancel(); |
| 423 | + } |
| 424 | + if (closed == false && checkInterval.duration() > 0) { |
| 425 | + monitor = threadPool.scheduleWithFixedDelay(this::run, checkInterval, threadPool.generic()); |
| 426 | + } |
371 | 427 | } |
372 | 428 |
|
373 | 429 | @Override |
374 | | - public void run() { |
| 430 | + public void close() throws IOException { |
| 431 | + closed = true; |
| 432 | + reschedule(); |
| 433 | + } |
| 434 | + |
| 435 | + private void run() { |
| 436 | + if (closed) { |
| 437 | + return; |
| 438 | + } |
375 | 439 | FsInfo.Path mostAvailablePath = null; |
376 | 440 | IOException fsInfoException = null; |
377 | 441 | for (NodeEnvironment.DataPath dataPath : dataPaths) { |
@@ -399,17 +463,18 @@ public void run() { |
399 | 463 | // subtract the configured free disk space threshold |
400 | 464 | mostAvailableDiskSpaceBytes -= getFreeBytesThreshold(mostAvailablePath.getTotal(), highStageWatermark, highStageMaxHeadroom) |
401 | 465 | .getBytes(); |
| 466 | + // clamp available space to 0 |
402 | 467 | long maxMergeSizeLimit = Math.max(0L, mostAvailableDiskSpaceBytes); |
403 | | - queuedMergeTasks.updateAvailableBudget(maxMergeSizeLimit); |
| 468 | + updateConsumer.accept(ByteSizeValue.ofBytes(maxMergeSizeLimit)); |
404 | 469 | } |
405 | 470 |
|
406 | 471 | private static ByteSizeValue getFreeBytesThreshold( |
407 | | - ByteSizeValue total, |
408 | | - RelativeByteSizeValue watermark, |
409 | | - ByteSizeValue maxHeadroom |
| 472 | + ByteSizeValue total, |
| 473 | + RelativeByteSizeValue watermark, |
| 474 | + ByteSizeValue maxHeadroom |
410 | 475 | ) { |
411 | | - // If bytes are given, they can be readily returned as free bytes. If percentages are given, we need to calculate the free |
412 | | - // bytes. |
| 476 | + // If bytes are given, they can be readily returned as free bytes. |
| 477 | + // If percentages are given, we need to calculate the free bytes. |
413 | 478 | if (watermark.isAbsolute()) { |
414 | 479 | return watermark.getAbsolute(); |
415 | 480 | } |
@@ -517,7 +582,7 @@ public E element() { |
517 | 582 |
|
518 | 583 | @Override |
519 | 584 | public void close() throws IOException { |
520 | | - diskSpaceMonitor.cancel(); |
| 585 | + availableDiskSpacePeriodicMonitor.close(); |
521 | 586 | } |
522 | 587 |
|
523 | 588 | private static long newTargetIORateBytesPerSec( |
|
0 commit comments