Skip to content

Commit c7e7dbe

Browse files
authored
Abort pending deletion on IndicesService stop (#123569)
When IndicesService is closed, the pending deletion may still be in progress due to indices removed before IndicesService gets closed. If the deletion stucks for some reason, it can stall the node shutdown. This PR aborts the pending deletion more promptly by not retry after IndicesService is stopped. Resolves: #121717 Resolves: #121716 Resolves: #122119
1 parent 2c0fb18 commit c7e7dbe

File tree

3 files changed

+16
-9
lines changed

3 files changed

+16
-9
lines changed

docs/changelog/123569.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 123569
2+
summary: Abort pending deletion on `IndicesService` close
3+
area: Store
4+
type: enhancement
5+
issues: []

muted-tests.yml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,6 @@ tests:
254254
- class: org.elasticsearch.test.rest.ClientYamlTestSuiteIT
255255
method: test {yaml=snapshot.delete/10_basic/Delete a snapshot asynchronously}
256256
issue: https://github.com/elastic/elasticsearch/issues/122102
257-
- class: org.elasticsearch.datastreams.TSDBPassthroughIndexingIT
258-
issue: https://github.com/elastic/elasticsearch/issues/121716
259257
- class: org.elasticsearch.smoketest.SmokeTestMonitoringWithSecurityIT
260258
method: testHTTPExporterWithSSL
261259
issue: https://github.com/elastic/elasticsearch/issues/122220
@@ -286,15 +284,9 @@ tests:
286284
- class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT
287285
method: test {yaml=reference/snapshot-restore/apis/get-snapshot-api/line_408}
288286
issue: https://github.com/elastic/elasticsearch/issues/122681
289-
- class: org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageIT
290-
method: testScaleWhileShrinking
291-
issue: https://github.com/elastic/elasticsearch/issues/122119
292287
- class: org.elasticsearch.search.basic.SearchWithRandomDisconnectsIT
293288
method: testSearchWithRandomDisconnects
294289
issue: https://github.com/elastic/elasticsearch/issues/122707
295-
- class: org.elasticsearch.snapshots.DedicatedClusterSnapshotRestoreIT
296-
method: testRestoreShrinkIndex
297-
issue: https://github.com/elastic/elasticsearch/issues/121717
298290
- class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT
299291
method: test {yaml=reference/cat/allocation/cat-allocation-example}
300292
issue: https://github.com/elastic/elasticsearch/issues/121976

server/src/main/java/org/elasticsearch/indices/IndicesService.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ public class IndicesService extends AbstractLifecycleComponent
255255
private final Map<String, IndexStorePlugin.RecoveryStateFactory> recoveryStateFactories;
256256
private final IndexStorePlugin.IndexFoldersDeletionListener indexFoldersDeletionListeners;
257257
final AbstractRefCounted indicesRefCount; // pkg-private for testing
258+
private final CountDownLatch stopLatch = new CountDownLatch(1);
258259
private final CountDownLatch closeLatch = new CountDownLatch(1);
259260
private volatile boolean idFieldDataEnabled;
260261
private volatile boolean allowExpensiveQueries;
@@ -403,6 +404,7 @@ public ClusterService clusterService() {
403404

404405
@Override
405406
protected void doStop() {
407+
stopLatch.countDown();
406408
clusterService.removeApplier(timestampFieldMapperService);
407409
timestampFieldMapperService.doStop();
408410

@@ -1440,7 +1442,15 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time
14401442
}
14411443
if (remove.isEmpty() == false) {
14421444
logger.warn("{} still pending deletes present for shards {} - retrying", index, remove.toString());
1443-
Thread.sleep(sleepTime);
1445+
if (stopLatch.await(sleepTime, TimeUnit.MILLISECONDS)) {
1446+
logger.info(
1447+
"Indices service stopped. {} aborting pending deletes after [{}] for shards {}",
1448+
index,
1449+
TimeValue.timeValueNanos(System.nanoTime() - startTimeNS),
1450+
remove.toString()
1451+
);
1452+
break;
1453+
}
14441454
sleepTime = Math.min(maxSleepTimeMs, sleepTime * 2); // increase the sleep time gradually
14451455
logger.debug("{} schedule pending delete retry after {} ms", index, sleepTime);
14461456
}

0 commit comments

Comments
 (0)