@@ -56,6 +56,7 @@ public class ThreadPoolMergeScheduler extends MergeScheduler implements Elastics
5656 // how many {@link MergeTask}s have kicked off (this is used to name them).
5757 private final AtomicLong mergeTaskCount = new AtomicLong ();
5858 private final AtomicLong mergeTaskDoneCount = new AtomicLong ();
59+ private final AtomicBoolean closed = new AtomicBoolean ();
5960
6061 public ThreadPoolMergeScheduler (ShardId shardId , IndexSettings indexSettings , ThreadPoolMergeQueue threadPoolMergeQueue ) {
6162 this .shardId = shardId ;
@@ -88,17 +89,28 @@ public void refreshConfig() {
8889 // if maxMergeCount changed, maybe we need to toggle merge task throttling
8990 checkMergeTaskThrottling ();
9091 // if maxThreadCount changed, maybe some backlogged merges are now allowed to run
91- enqueueBacklogged ();
92+ enqueueBackloggedTasks ();
9293 }
9394
9495 @ Override
9596 public void merge (MergeSource mergeSource , MergeTrigger trigger ) throws IOException {
97+ if (closed .get ()) {
98+ // avoid pulling from the merge source when closing
99+ return ;
100+ }
96101 MergePolicy .OneMerge merge = mergeSource .getNextMerge ();
97102 if (merge != null ) {
98103 submitNewMergeTask (mergeSource , merge , trigger );
99104 }
100105 }
101106
107+ @ Override
108+ public MergeScheduler clone () {
109+ // Lucene IW makes a clone internally but since we hold on to this instance
110+ // the clone will just be the identity.
111+ return this ;
112+ }
113+
102114 /**
103115 * A callback allowing for custom logic before an actual merge starts.
104116 */
@@ -109,17 +121,20 @@ protected void beforeMerge(OnGoingMerge merge) {}
109121 */
110122 protected void afterMerge (OnGoingMerge merge ) {}
111123
124+ /**
125+ * A callback that's invoked when indexing should throttle down indexing in order to let merging to catch up.
126+ */
112127 protected void enableIndexingThrottling (int numRunningMerges , int numQueuedMerges , int configuredMaxMergeCount ) {}
113128
129+ /**
130+ * A callback that's invoked when indexing should un-throttle because merging caught up.
131+ * This is invoked sometime after {@link #enableIndexingThrottling(int, int, int)} was invoked in the first place.
132+ */
114133 protected void disableIndexingThrottling (int numRunningMerges , int numQueuedMerges , int configuredMaxMergeCount ) {}
115134
116- @ Override
117- public MergeScheduler clone () {
118- // Lucene IW makes a clone internally but since we hold on to this instance
119- // the clone will just be the identity.
120- return this ;
121- }
122-
135+ /**
136+ * A callback for exceptions thrown while merging.
137+ */
123138 protected void handleMergeException (Throwable t ) {
124139 throw new MergePolicy .MergeException (t );
125140 }
@@ -162,13 +177,15 @@ private MergeTask newMergeTask(MergeSource mergeSource, MergePolicy.OneMerge mer
162177 // synchronized so that {@code #currentlyRunningMergeTasks} and {@code #backloggedMergeTasks} are modified atomically
163178 private synchronized boolean runNowOrBacklog (MergeTask mergeTask ) {
164179 assert mergeTask .isRunning () == false ;
165- if (currentlyRunningMergeTasks .size () >= config .getMaxThreadCount ()) {
166- backloggedMergeTasks .add (mergeTask );
167- return false ;
168- } else {
180+ assert mergeTask .isOnGoingMergeAborted () == false ;
181+ // if the merge scheduler is closed it will abort all merges before they start running
182+ if (closed .get () || currentlyRunningMergeTasks .size () < config .getMaxThreadCount ()) {
169183 boolean added = currentlyRunningMergeTasks .put (mergeTask .onGoingMerge .getMerge (), mergeTask ) == null ;
170184 assert added : "starting merge task [" + mergeTask + "] registered as already running" ;
171185 return true ;
186+ } else {
187+ backloggedMergeTasks .add (mergeTask );
188+ return false ;
172189 }
173190 }
174191
@@ -178,15 +195,20 @@ private void mergeDone(MergeTask mergeTask) {
178195 boolean removed = currentlyRunningMergeTasks .remove (mergeTask .onGoingMerge .getMerge ()) != null ;
179196 assert removed : "completed merge task [" + mergeTask + "] not registered as running" ;
180197 // when one merge is done, maybe a backlogged one can now execute
181- enqueueBacklogged ();
198+ enqueueBackloggedTasks ();
199+ // when closing, we wait for all running merges to finish
200+ if (currentlyRunningMergeTasks .isEmpty ()) {
201+ notifyAll ();
202+ }
182203 }
183204 mergeTaskDoneCount .incrementAndGet ();
184205 checkMergeTaskThrottling ();
185206 }
186207
187- private synchronized void enqueueBacklogged () {
208+ private synchronized void enqueueBackloggedTasks () {
188209 int maxBackloggedTasksToEnqueue = config .getMaxThreadCount () - currentlyRunningMergeTasks .size ();
189- while (maxBackloggedTasksToEnqueue -- > 0 ) {
210+ // enqueue all backlogged tasks when closing, let the queue deal with them
211+ while (closed .get () || maxBackloggedTasksToEnqueue -- > 0 ) {
190212 MergeTask backloggedMergeTask = backloggedMergeTasks .poll ();
191213 if (backloggedMergeTask == null ) {
192214 break ;
@@ -271,6 +293,11 @@ public boolean isRunning() {
271293
272294 @ Override
273295 public void run () {
296+ assert isOnGoingMergeAborted () == false ;
297+ if (ThreadPoolMergeScheduler .this .closed .get ()) {
298+ abortOnGoingMerge ();
299+ return ;
300+ }
274301 if (mergeStartTimeNS .compareAndSet (0L , System .nanoTime ()) == false ) {
275302 throw new IllegalStateException ("Cannot run the same merge task multiple times" );
276303 }
@@ -302,6 +329,7 @@ public void run() {
302329 if (t instanceof MergePolicy .MergeAbortedException ) {
303330 // OK to ignore. This is what Lucene's ConcurrentMergeScheduler does
304331 } else {
332+ // sometimes this might double-abort a merge, but that's OK
305333 abortOnGoingMerge ();
306334 handleMergeException (t );
307335 }
@@ -317,32 +345,27 @@ public void run() {
317345 mergeTracking .mergeFinished (onGoingMerge .getMerge (), onGoingMerge , tookMS );
318346 } finally {
319347 mergeDone (this );
320- // kick-off next merge, if any
321- MergePolicy .OneMerge nextMerge = null ;
322- try {
323- nextMerge = mergeSource .getNextMerge ();
324- } catch (IllegalStateException e ) {
325- if (verbose ()) {
326- message ("merge task poll failed, likely that index writer is failed" );
348+ if (ThreadPoolMergeScheduler .this .closed .get () == false ) {
349+ // kick-off next merge, if any
350+ MergePolicy .OneMerge nextMerge = null ;
351+ try {
352+ nextMerge = mergeSource .getNextMerge ();
353+ } catch (IllegalStateException e ) {
354+ if (verbose ()) {
355+ message ("merge task poll failed, likely that index writer is failed" );
356+ }
357+ // ignore exception, we expect the IW failure to be logged elsewhere
358+ }
359+ if (nextMerge != null ) {
360+ submitNewMergeTask (mergeSource , nextMerge , MergeTrigger .MERGE_FINISHED );
327361 }
328- // ignore exception, we expect the IW failure to be logged elsewhere
329- }
330- if (nextMerge != null ) {
331- submitNewMergeTask (mergeSource , nextMerge , MergeTrigger .MERGE_FINISHED );
332362 }
333363 }
334364 }
335365 }
336366 }
337367
338- public void onRejection (Exception e ) {
339- if (verbose ()) {
340- message (String .format (Locale .ROOT , "merge task [%s] rejected by thread pool, aborting" , onGoingMerge .getId ()));
341- }
342- abortOnGoingMerge ();
343- }
344-
345- private void abortOnGoingMerge () {
368+ void abortOnGoingMerge () {
346369 // This would interrupt an IndexWriter if it were actually performing the merge. We just set this here because it seems
347370 // appropriate as we are not going to move forward with the merge.
348371 onGoingMerge .getMerge ().setAborted ();
@@ -351,6 +374,10 @@ private void abortOnGoingMerge() {
351374 mergeSource .onMergeFinished (onGoingMerge .getMerge ());
352375 }
353376
377+ boolean isOnGoingMergeAborted () {
378+ return onGoingMerge .getMerge ().isAborted ();
379+ }
380+
354381 @ Override
355382 public String toString () {
356383 return name ;
@@ -375,6 +402,35 @@ protected void message(String message) {
375402 super .message (message );
376403 }
377404
405+ @ Override
406+ public void close () throws IOException {
407+ super .close ();
408+ closed .set (true );
409+ boolean interrupted = false ;
410+ try {
411+ synchronized (this ) {
412+ // enqueue all backlogged merge tasks, the merge queue assumes that backlogged tasks are always re-enqueued
413+ enqueueBackloggedTasks ();
414+ // supercharge running merges
415+ currentlyRunningMergeTasks .values ().forEach (runningTask -> runningTask .setIORateLimit (Double .POSITIVE_INFINITY ));
416+ // wait until all running merges are done
417+ while (currentlyRunningMergeTasks .isEmpty () == false ) {
418+ try {
419+ // wait with a timeout, just to cover for something that failed to notify
420+ wait (1000 );
421+ } catch (InterruptedException e ) {
422+ // ignore interruption, we will retry until all currently running merge tasks are done
423+ interrupted = true ;
424+ }
425+ }
426+ }
427+ } finally {
428+ if (interrupted ) {
429+ Thread .currentThread ().interrupt ();
430+ }
431+ }
432+ }
433+
378434 private static double nsToSec (long ns ) {
379435 return ns / (double ) TimeUnit .SECONDS .toNanos (1 );
380436 }
0 commit comments