Skip to content

Commit 83fe2ed

Browse files
nicktindalltlrx
andauthored
Optimise shared-blob-cache evictions (#126581)
Closes: ES-10744 Co-authored-by: Tanguy Leroux <[email protected]>
1 parent 8929a64 commit 83fe2ed

File tree

26 files changed

+570
-132
lines changed

26 files changed

+570
-132
lines changed

docs/changelog/126581.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pr: 126581
2+
summary: "Optimize shared blob cache evictions on shard removal
3+
Shared blob cache evictions occur on the cluster applier thread when shards are
4+
removed from a node. These can be expensive if a large number of shards are
5+
being removed. This change uses the context of the removal to avoid unnecessary
6+
evictions that might hold up the applier thread.
7+
"
8+
area: Snapshot/Restore
9+
type: enhancement
10+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/plugins/IndexFoldersDeletionListenerIT.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.index.IndexSettings;
2323
import org.elasticsearch.index.shard.ShardId;
2424
import org.elasticsearch.indices.IndicesService;
25+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
2526
import org.elasticsearch.test.ESIntegTestCase;
2627
import org.elasticsearch.test.junit.annotations.TestLogging;
2728

@@ -345,12 +346,22 @@ public static class IndexFoldersDeletionListenerPlugin extends Plugin implements
345346
public List<IndexFoldersDeletionListener> getIndexFoldersDeletionListeners() {
346347
return List.of(new IndexFoldersDeletionListener() {
347348
@Override
348-
public void beforeIndexFoldersDeleted(Index index, IndexSettings indexSettings, Path[] indexPaths) {
349+
public void beforeIndexFoldersDeleted(
350+
Index index,
351+
IndexSettings indexSettings,
352+
Path[] indexPaths,
353+
IndexRemovalReason reason
354+
) {
349355
deletedIndices.add(index);
350356
}
351357

352358
@Override
353-
public void beforeShardFoldersDeleted(ShardId shardId, IndexSettings indexSettings, Path[] shardPaths) {
359+
public void beforeShardFoldersDeleted(
360+
ShardId shardId,
361+
IndexSettings indexSettings,
362+
Path[] shardPaths,
363+
IndexRemovalReason reason
364+
) {
354365
deletedShards.computeIfAbsent(shardId.getIndex(), i -> Collections.synchronizedList(new ArrayList<>())).add(shardId);
355366
}
356367
});

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexAliasesService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848

4949
import static java.util.Collections.emptyList;
5050
import static java.util.Collections.emptyMap;
51-
import static org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason.NO_LONGER_ASSIGNED;
51+
import static org.elasticsearch.indices.cluster.IndexRemovalReason.NO_LONGER_ASSIGNED;
5252

5353
/**
5454
* Service responsible for submitting add and remove aliases requests

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383

8484
import static java.util.Collections.emptyMap;
8585
import static org.elasticsearch.cluster.metadata.MetadataCreateDataStreamService.validateTimestampFieldMapping;
86-
import static org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason.NO_LONGER_ASSIGNED;
86+
import static org.elasticsearch.indices.cluster.IndexRemovalReason.NO_LONGER_ASSIGNED;
8787

8888
/**
8989
* Service responsible for submitting index templates updates

server/src/main/java/org/elasticsearch/index/CompositeIndexEventListener.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import org.elasticsearch.index.shard.IndexShard;
2222
import org.elasticsearch.index.shard.IndexShardState;
2323
import org.elasticsearch.index.shard.ShardId;
24-
import org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason;
24+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
2525
import org.elasticsearch.threadpool.ThreadPool;
2626

2727
import java.util.Collection;

server/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import org.elasticsearch.index.store.Store;
8484
import org.elasticsearch.index.translog.Translog;
8585
import org.elasticsearch.indices.breaker.CircuitBreakerService;
86+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
8687
import org.elasticsearch.indices.cluster.IndicesClusterStateService;
8788
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;
8889
import org.elasticsearch.indices.recovery.RecoveryState;
@@ -494,7 +495,12 @@ public synchronized IndexShard createShard(
494495
nodeEnv,
495496
lock,
496497
this.indexSettings,
497-
shardPaths -> indexFoldersDeletionListener.beforeShardFoldersDeleted(shardId, this.indexSettings, shardPaths)
498+
shardPaths -> indexFoldersDeletionListener.beforeShardFoldersDeleted(
499+
shardId,
500+
this.indexSettings,
501+
shardPaths,
502+
IndexRemovalReason.FAILURE
503+
)
498504
);
499505
path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings.customDataPath());
500506
} catch (Exception inner) {
@@ -704,11 +710,11 @@ private void onShardClose(ShardLock lock) {
704710
try {
705711
eventListener.beforeIndexShardDeleted(lock.getShardId(), indexSettings.getSettings());
706712
} finally {
707-
shardStoreDeleter.deleteShardStore("delete index", lock, indexSettings);
713+
shardStoreDeleter.deleteShardStore("delete index", lock, indexSettings, IndexRemovalReason.DELETED);
708714
eventListener.afterIndexShardDeleted(lock.getShardId(), indexSettings.getSettings());
709715
}
710716
} catch (IOException e) {
711-
shardStoreDeleter.addPendingDelete(lock.getShardId(), indexSettings);
717+
shardStoreDeleter.addPendingDelete(lock.getShardId(), indexSettings, IndexRemovalReason.DELETED);
712718
logger.debug(() -> "[" + lock.getShardId().id() + "] failed to delete shard content - scheduled a retry", e);
713719
}
714720
}
@@ -1062,9 +1068,9 @@ public static Function<String, String> dateMathExpressionResolverAt(long instant
10621068
}
10631069

10641070
public interface ShardStoreDeleter {
1065-
void deleteShardStore(String reason, ShardLock lock, IndexSettings indexSettings) throws IOException;
1071+
void deleteShardStore(String reasonText, ShardLock lock, IndexSettings indexSettings, IndexRemovalReason reason) throws IOException;
10661072

1067-
void addPendingDelete(ShardId shardId, IndexSettings indexSettings);
1073+
void addPendingDelete(ShardId shardId, IndexSettings indexSettings, IndexRemovalReason reason);
10681074
}
10691075

10701076
public final EngineFactory getEngineFactory() {

server/src/main/java/org/elasticsearch/index/shard/IndexEventListener.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import org.elasticsearch.index.Index;
1616
import org.elasticsearch.index.IndexService;
1717
import org.elasticsearch.index.IndexSettings;
18-
import org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason;
18+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
1919

2020
/**
2121
* An index event listener is the primary extension point for plugins and build-in services

server/src/main/java/org/elasticsearch/indices/IndicesService.java

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@
133133
import org.elasticsearch.index.shard.ShardId;
134134
import org.elasticsearch.index.translog.TranslogStats;
135135
import org.elasticsearch.indices.breaker.CircuitBreakerService;
136+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
136137
import org.elasticsearch.indices.cluster.IndicesClusterStateService;
137138
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;
138139
import org.elasticsearch.indices.recovery.PeerRecoveryTargetService;
@@ -1021,7 +1022,7 @@ public void removeIndex(
10211022
listener.afterIndexRemoved(indexService.index(), indexSettings, reason);
10221023
if (reason == IndexRemovalReason.DELETED) {
10231024
// now we are done - try to wipe data on disk if possible
1024-
deleteIndexStore(extraInfo, indexService.index(), indexSettings);
1025+
deleteIndexStore(extraInfo, indexService.index(), indexSettings, reason);
10251026
}
10261027
}));
10271028
});
@@ -1105,7 +1106,7 @@ public void deleteUnassignedIndex(String reason, IndexMetadata oldIndexMetadata,
11051106
+ "]"
11061107
);
11071108
}
1108-
deleteIndexStore(reason, oldIndexMetadata);
1109+
deleteIndexStore(reason, oldIndexMetadata, IndexRemovalReason.DELETED);
11091110
} catch (Exception e) {
11101111
logger.warn(() -> format("[%s] failed to delete unassigned index (reason [%s])", oldIndexMetadata.getIndex(), reason), e);
11111112
}
@@ -1118,7 +1119,7 @@ public void deleteUnassignedIndex(String reason, IndexMetadata oldIndexMetadata,
11181119
*
11191120
* Package private for testing
11201121
*/
1121-
void deleteIndexStore(String reason, IndexMetadata metadata) throws IOException {
1122+
void deleteIndexStore(String reasonText, IndexMetadata metadata, IndexRemovalReason reason) throws IOException {
11221123
if (nodeEnv.hasNodeFile()) {
11231124
synchronized (this) {
11241125
Index index = metadata.getIndex();
@@ -1136,33 +1137,35 @@ void deleteIndexStore(String reason, IndexMetadata metadata) throws IOException
11361137
}
11371138
}
11381139
final IndexSettings indexSettings = buildIndexSettings(metadata);
1139-
deleteIndexStore(reason, indexSettings.getIndex(), indexSettings);
1140+
deleteIndexStore(reasonText, indexSettings.getIndex(), indexSettings, reason);
11401141
}
11411142
}
11421143

1143-
private void deleteIndexStore(String reason, Index index, IndexSettings indexSettings) throws IOException {
1144-
deleteIndexStoreIfDeletionAllowed(reason, index, indexSettings, DEFAULT_INDEX_DELETION_PREDICATE);
1144+
private void deleteIndexStore(String reasonText, Index index, IndexSettings indexSettings, IndexRemovalReason reason)
1145+
throws IOException {
1146+
deleteIndexStoreIfDeletionAllowed(reasonText, index, indexSettings, DEFAULT_INDEX_DELETION_PREDICATE, reason);
11451147
}
11461148

11471149
private void deleteIndexStoreIfDeletionAllowed(
1148-
final String reason,
1150+
final String reasonText,
11491151
final Index index,
11501152
final IndexSettings indexSettings,
1151-
final IndexDeletionAllowedPredicate predicate
1153+
final IndexDeletionAllowedPredicate predicate,
1154+
final IndexRemovalReason reason
11521155
) throws IOException {
11531156
boolean success = false;
11541157
try {
11551158
// we are trying to delete the index store here - not a big deal if the lock can't be obtained
11561159
// the store metadata gets wiped anyway even without the lock this is just best effort since
11571160
// every shards deletes its content under the shard lock it owns.
1158-
logger.debug("{} deleting index store reason [{}]", index, reason);
1161+
logger.debug("{} deleting index store reason [{}]", index, reasonText);
11591162
if (predicate.apply(index, indexSettings)) {
11601163
// its safe to delete all index metadata and shard data
11611164
nodeEnv.deleteIndexDirectorySafe(
11621165
index,
11631166
0,
11641167
indexSettings,
1165-
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths)
1168+
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths, reason)
11661169
);
11671170
}
11681171
success = true;
@@ -1172,7 +1175,7 @@ private void deleteIndexStoreIfDeletionAllowed(
11721175
logger.warn(() -> format("%s failed to delete index", index), ex);
11731176
} finally {
11741177
if (success == false) {
1175-
addPendingDelete(index, indexSettings);
1178+
addPendingDelete(index, indexSettings, reason);
11761179
}
11771180
// this is a pure protection to make sure this index doesn't get re-imported as a dangling index.
11781181
// we should in the future rather write a tombstone rather than wiping the metadata.
@@ -1182,19 +1185,21 @@ private void deleteIndexStoreIfDeletionAllowed(
11821185

11831186
/**
11841187
* Deletes the shard with an already acquired shard lock.
1185-
* @param reason the reason for the shard deletion
1188+
* @param reasonText the reason for the shard deletion
11861189
* @param lock the lock of the shard to delete
11871190
* @param indexSettings the shards index settings.
1191+
* @param reason the reason for the deletion (as an enum)
11881192
* @throws IOException if an IOException occurs
11891193
*/
11901194
@Override
1191-
public void deleteShardStore(String reason, ShardLock lock, IndexSettings indexSettings) throws IOException {
1195+
public void deleteShardStore(String reasonText, ShardLock lock, IndexSettings indexSettings, IndexRemovalReason reason)
1196+
throws IOException {
11921197
ShardId shardId = lock.getShardId();
1193-
logger.trace("{} deleting shard reason [{}]", shardId, reason);
1198+
logger.trace("{} deleting shard reason [{}]", shardId, reasonText);
11941199
nodeEnv.deleteShardDirectoryUnderLock(
11951200
lock,
11961201
indexSettings,
1197-
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths)
1202+
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths, reason)
11981203
);
11991204
}
12001205

@@ -1206,13 +1211,14 @@ public void deleteShardStore(String reason, ShardLock lock, IndexSettings indexS
12061211
* On data nodes, if the deleted shard is the last shard folder in its index, the method will attempt to remove
12071212
* the index folder as well.
12081213
*
1209-
* @param reason the reason for the shard deletion
1214+
* @param reasonText the reason for the shard deletion
12101215
* @param shardId the shards ID to delete
12111216
* @param clusterState . This is required to access the indexes settings etc.
1217+
* @param reason The reason for the deletion (as an enum)
12121218
* @throws IOException if an IOException occurs
12131219
*/
1214-
public void deleteShardStore(String reason, ShardId shardId, ClusterState clusterState) throws IOException,
1215-
ShardLockObtainFailedException {
1220+
public void deleteShardStore(String reasonText, ShardId shardId, ClusterState clusterState, IndexRemovalReason reason)
1221+
throws IOException, ShardLockObtainFailedException {
12161222
final IndexMetadata metadata = clusterState.getMetadata().getProject().indices().get(shardId.getIndexName());
12171223

12181224
final IndexSettings indexSettings = buildIndexSettings(metadata);
@@ -1223,15 +1229,15 @@ public void deleteShardStore(String reason, ShardId shardId, ClusterState cluste
12231229
nodeEnv.deleteShardDirectorySafe(
12241230
shardId,
12251231
indexSettings,
1226-
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths)
1232+
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths, reason)
12271233
);
1228-
logger.debug("{} deleted shard reason [{}]", shardId, reason);
1234+
logger.debug("{} deleted shard reason [{}]", shardId, reasonText);
12291235

12301236
if (canDeleteIndexContents(shardId.getIndex())) {
12311237
if (nodeEnv.findAllShardIds(shardId.getIndex()).isEmpty()) {
12321238
try {
12331239
// note that deleteIndexStore have more safety checks and may throw an exception if index was concurrently created.
1234-
deleteIndexStore("no longer used", metadata);
1240+
deleteIndexStore("no longer used", metadata, reason);
12351241
} catch (Exception e) {
12361242
// wrap the exception to indicate we already deleted the shard
12371243
throw new ElasticsearchException("failed to delete unused index after deleting its last shard (" + shardId + ")", e);
@@ -1287,7 +1293,7 @@ public IndexMetadata verifyIndexIsDeleted(final Index index, final ClusterState
12871293
}
12881294
final IndexSettings indexSettings = buildIndexSettings(metadata);
12891295
try {
1290-
deleteIndexStoreIfDeletionAllowed("stale deleted index", index, indexSettings, ALWAYS_TRUE);
1296+
deleteIndexStoreIfDeletionAllowed("stale deleted index", index, indexSettings, ALWAYS_TRUE, IndexRemovalReason.DELETED);
12911297
} catch (Exception e) {
12921298
// we just warn about the exception here because if deleteIndexStoreIfDeletionAllowed
12931299
// throws an exception, it gets added to the list of pending deletes to be tried again
@@ -1345,22 +1351,22 @@ private IndexSettings buildIndexSettings(IndexMetadata metadata) {
13451351
* Adds a pending delete for the given index shard.
13461352
*/
13471353
@Override
1348-
public void addPendingDelete(ShardId shardId, IndexSettings settings) {
1354+
public void addPendingDelete(ShardId shardId, IndexSettings settings, IndexRemovalReason reason) {
13491355
if (shardId == null) {
13501356
throw new IllegalArgumentException("shardId must not be null");
13511357
}
13521358
if (settings == null) {
13531359
throw new IllegalArgumentException("settings must not be null");
13541360
}
1355-
PendingDelete pendingDelete = new PendingDelete(shardId, settings);
1361+
PendingDelete pendingDelete = new PendingDelete(shardId, settings, reason);
13561362
addPendingDelete(shardId.getIndex(), pendingDelete);
13571363
}
13581364

13591365
/**
13601366
* Adds a pending delete for the given index.
13611367
*/
1362-
public void addPendingDelete(Index index, IndexSettings settings) {
1363-
PendingDelete pendingDelete = new PendingDelete(index, settings);
1368+
public void addPendingDelete(Index index, IndexSettings settings, IndexRemovalReason reason) {
1369+
PendingDelete pendingDelete = new PendingDelete(index, settings, reason);
13641370
addPendingDelete(index, pendingDelete);
13651371
}
13661372

@@ -1376,25 +1382,28 @@ private static final class PendingDelete implements Comparable<PendingDelete> {
13761382
final int shardId;
13771383
final IndexSettings settings;
13781384
final boolean deleteIndex;
1385+
final IndexRemovalReason reason;
13791386

13801387
/**
13811388
* Creates a new pending delete of an index
13821389
*/
1383-
PendingDelete(ShardId shardId, IndexSettings settings) {
1390+
PendingDelete(ShardId shardId, IndexSettings settings, IndexRemovalReason reason) {
13841391
this.index = shardId.getIndex();
13851392
this.shardId = shardId.getId();
13861393
this.settings = settings;
13871394
this.deleteIndex = false;
1395+
this.reason = reason;
13881396
}
13891397

13901398
/**
13911399
* Creates a new pending delete of a shard
13921400
*/
1393-
PendingDelete(Index index, IndexSettings settings) {
1401+
PendingDelete(Index index, IndexSettings settings, IndexRemovalReason reason) {
13941402
this.index = index;
13951403
this.shardId = -1;
13961404
this.settings = settings;
13971405
this.deleteIndex = true;
1406+
this.reason = reason;
13981407
}
13991408

14001409
@Override
@@ -1458,7 +1467,12 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time
14581467
nodeEnv.deleteIndexDirectoryUnderLock(
14591468
index,
14601469
indexSettings,
1461-
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths)
1470+
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(
1471+
index,
1472+
indexSettings,
1473+
paths,
1474+
delete.reason
1475+
)
14621476
);
14631477
iterator.remove();
14641478
} catch (IOException ex) {
@@ -1470,7 +1484,7 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time
14701484
final ShardLock shardLock = locks.get(shardId);
14711485
if (shardLock != null) {
14721486
try {
1473-
deleteShardStore("pending delete", shardLock, delete.settings);
1487+
deleteShardStore("pending delete", shardLock, delete.settings, delete.reason);
14741488
iterator.remove();
14751489
} catch (IOException ex) {
14761490
logger.debug(() -> format("%s retry pending delete", shardLock.getShardId()), ex);

0 commit comments

Comments
 (0)