-
Notifications
You must be signed in to change notification settings - Fork 25.6k
WIP Threadpool merge scheduler sort all merges #120733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bf557d2
a3f87df
f5a1a8d
f0b72fe
9b03950
26e4043
aba69d0
52796b5
c0667bf
2da753f
2c8dc7f
81cc0f1
f58120f
5ca992d
3c203cb
6c21654
68079d9
7b68ba9
9e467a1
a8f5297
928fd32
3f5b4a8
0e714a1
0297cce
2b79809
57c2a5c
60a71b8
68db209
4099ac5
5554bc2
17b682f
f3506da
6a3911a
00070cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the "Elastic License | ||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
| * Public License v 1"; you may not use this file except in compliance with, at | ||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||
| */ | ||
|
|
||
| package org.elasticsearch.index.engine; | ||
|
|
||
| import org.elasticsearch.index.engine.ThreadPoolMergeScheduler.MergeTask; | ||
| import org.elasticsearch.threadpool.ThreadPool; | ||
|
|
||
| import java.util.Comparator; | ||
| import java.util.SortedSet; | ||
| import java.util.TreeSet; | ||
| import java.util.concurrent.ExecutorService; | ||
| import java.util.function.Consumer; | ||
|
|
||
| public class ThreadPoolMergeExecutor { | ||
| /** | ||
| * Floor for IO write rate limit (we will never go any lower than this) | ||
| */ | ||
| private static final double MIN_MERGE_MB_PER_SEC = 5.0; | ||
| /** | ||
| * Ceiling for IO write rate limit (we will never go any higher than this) | ||
| */ | ||
| private static final double MAX_MERGE_MB_PER_SEC = 10240.0; | ||
| /** | ||
| * Initial value for IO write rate limit when doAutoIOThrottle is true | ||
| */ | ||
| private static final double START_MB_PER_SEC = 20.0; | ||
| /** | ||
| * Current IO write throttle rate, for all merge, across all merge schedulers (shards) on the node | ||
| */ | ||
| private double targetMBPerSec = START_MB_PER_SEC; | ||
| private final SortedSet<ThreadPoolMergeScheduler> registeredMergeSchedulers = new TreeSet<>(new Comparator<ThreadPoolMergeScheduler>() { | ||
| @Override | ||
| public int compare(ThreadPoolMergeScheduler tpms1, ThreadPoolMergeScheduler tpms2) { | ||
| MergeTask mergeTask1 = tpms1.peekMergeTaskToExecute(); | ||
| MergeTask mergeTask2 = tpms2.peekMergeTaskToExecute(); | ||
| if (mergeTask1 == null && mergeTask2 == null) { | ||
| // arbitrary order between schedulers that cannot run any merge right now | ||
| return System.identityHashCode(tpms1) - System.identityHashCode(tpms2); | ||
| } else if (mergeTask1 == null) { | ||
| // "merge task 2" can run because "merge scheduler 1" cannot run any merges | ||
| return 1; | ||
| } else if (mergeTask2 == null) { | ||
| // "merge task 1" can run because "merge scheduler 2" cannot run any merges | ||
| return -1; | ||
| } else { | ||
| // run smaller merge task first | ||
| return mergeTask1.compareTo(mergeTask2); | ||
| } | ||
| } | ||
| }); | ||
| private final ExecutorService executorService; | ||
| private final int maxConcurrentMerges; | ||
| private int currentlyExecutingMergesCount; | ||
| private int currentlyActiveIOThrottledMergesCount; | ||
|
|
||
| public ThreadPoolMergeExecutor(ThreadPool threadPool) { | ||
| this.executorService = threadPool.executor(ThreadPool.Names.MERGE); | ||
| this.maxConcurrentMerges = threadPool.info(ThreadPool.Names.MERGE).getMax(); | ||
| } | ||
|
|
||
| public double getTargetMBPerSec() { | ||
| return targetMBPerSec; | ||
| } | ||
|
|
||
| public synchronized void registerMergeScheduler(ThreadPoolMergeScheduler threadPoolMergeScheduler) { | ||
| if (registeredMergeSchedulers.add(threadPoolMergeScheduler) == false) { | ||
| throw new IllegalStateException("cannot register the same scheduler multiple times"); | ||
| } | ||
| } | ||
|
|
||
| public synchronized void unregisterMergeScheduler(ThreadPoolMergeScheduler threadPoolMergeScheduler) { | ||
| if (registeredMergeSchedulers.remove(threadPoolMergeScheduler) == false) { | ||
| throw new IllegalStateException("cannot unregister if the scheduler has not been registered"); | ||
| } | ||
| } | ||
|
|
||
| public synchronized void updateMergeScheduler( | ||
| ThreadPoolMergeScheduler threadPoolMergeScheduler, | ||
| Consumer<ThreadPoolMergeScheduler> updater | ||
| ) { | ||
| boolean removed = registeredMergeSchedulers.remove(threadPoolMergeScheduler); | ||
| if (false == removed) { | ||
| throw new IllegalStateException("Cannot update a merge scheduler that is not registered"); | ||
| } | ||
| currentlyExecutingMergesCount -= threadPoolMergeScheduler.getCurrentlyRunningMergeTasks().size(); | ||
| currentlyActiveIOThrottledMergesCount -= getIOThrottledMergeTasksCount(threadPoolMergeScheduler); | ||
| updater.accept(threadPoolMergeScheduler); | ||
| boolean added = registeredMergeSchedulers.add(threadPoolMergeScheduler); | ||
| if (false == added) { | ||
| throw new IllegalStateException("Found duplicate registered merge scheduler"); | ||
| } | ||
| currentlyExecutingMergesCount += threadPoolMergeScheduler.getCurrentlyRunningMergeTasks().size(); | ||
| currentlyActiveIOThrottledMergesCount += getIOThrottledMergeTasksCount(threadPoolMergeScheduler); | ||
| double newTargetMBPerSec = maybeUpdateTargetMBPerSec(); | ||
| if (newTargetMBPerSec != targetMBPerSec) { | ||
| targetMBPerSec = newTargetMBPerSec; | ||
| threadPoolMergeScheduler.setIORateLimitForAllMergeTasks(newTargetMBPerSec); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this not set it for all merge schedulers or running tasks? I wonder if we should keep a list of the running tasks in this class so we can easily set it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's right it should be set for all merge schedulers, and, as you say, it would better be set for the running tasks only (and set it when they start). |
||
| } | ||
| maybeExecuteNextMerges(); | ||
| } | ||
|
Comment on lines
+84
to
+107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An to a threadpool has the potential to reorder it in the sorted set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It also has the potential to change the stats related to IO throttling, because that is done "globally" i.e. across all shards on the node. |
||
|
|
||
| public synchronized void maybeExecuteNextMerges() { | ||
| while (true) { | ||
| if (currentlyExecutingMergesCount >= maxConcurrentMerges) { | ||
| // all merge threads are busy | ||
| return; | ||
| } | ||
| if (registeredMergeSchedulers.first().peekMergeTaskToExecute() == null) { | ||
| // no merges available to run | ||
| return; | ||
| } | ||
| ThreadPoolMergeScheduler threadPoolMergeScheduler = registeredMergeSchedulers.removeFirst(); | ||
| currentlyExecutingMergesCount -= threadPoolMergeScheduler.getCurrentlyRunningMergeTasks().size(); | ||
| MergeTask mergeTask = threadPoolMergeScheduler.executeNextMergeTask(); | ||
| assert mergeTask != null; | ||
| executorService.execute(mergeTask); | ||
| registeredMergeSchedulers.add(threadPoolMergeScheduler); | ||
| currentlyExecutingMergesCount += threadPoolMergeScheduler.getCurrentlyRunningMergeTasks().size(); | ||
| } | ||
| } | ||
|
|
||
| private int getIOThrottledMergeTasksCount(ThreadPoolMergeScheduler mergeScheduler) { | ||
| if (mergeScheduler.shouldIOThrottleMergeTasks() == false) { | ||
| return 0; | ||
| } else { | ||
| int count = 0; | ||
| for (MergeTask runningMergeTask : mergeScheduler.getCurrentlyRunningMergeTasks()) { | ||
| if (runningMergeTask.supportsIOThrottling()) { | ||
| count++; | ||
| } | ||
| } | ||
| for (MergeTask queuedMergeTask : mergeScheduler.getQueuedMergeTasks()) { | ||
| if (queuedMergeTask.supportsIOThrottling()) { | ||
| count++; | ||
| } | ||
| } | ||
| return count; | ||
| } | ||
| } | ||
|
|
||
| private double maybeUpdateTargetMBPerSec() { | ||
| if (currentlyActiveIOThrottledMergesCount < maxConcurrentMerges * 2 && targetMBPerSec > MIN_MERGE_MB_PER_SEC) { | ||
| return Math.max(MIN_MERGE_MB_PER_SEC, targetMBPerSec / 1.1); | ||
| } else if (currentlyActiveIOThrottledMergesCount > maxConcurrentMerges * 4 && targetMBPerSec < MAX_MERGE_MB_PER_SEC) { | ||
| return Math.min(MAX_MERGE_MB_PER_SEC, targetMBPerSec * 1.1); | ||
| } | ||
| return targetMBPerSec; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have some perhaps unqualified anxiety around this central mutex across the node. Probably not a real issue but noting it anyway.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this is an issue, but I don't see a better way in the current design.