Skip to content

Commit bad9277

Browse files
authored
Optimise shared-blob-cache evictions (elastic#128539)
* Optimise shared-blob-cache evictions (elastic#126581) Closes: ES-10744 Co-authored-by: Tanguy Leroux <[email protected]> (cherry picked from commit 83fe2ed) * Update docs/changelog/128539.yaml * Fix changelogs * Delete docs/changelog/128539.yaml * Restore original changelog
1 parent 149a674 commit bad9277

File tree

26 files changed

+567
-129
lines changed

26 files changed

+567
-129
lines changed

docs/changelog/126581.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pr: 126581
2+
summary: "Optimize shared blob cache evictions on shard removal
3+
Shared blob cache evictions occur on the cluster applier thread when shards are
4+
removed from a node. These can be expensive if a large number of shards are
5+
being removed. This change uses the context of the removal to avoid unnecessary
6+
evictions that might hold up the applier thread.
7+
"
8+
area: Snapshot/Restore
9+
type: enhancement
10+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/plugins/IndexFoldersDeletionListenerIT.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.index.IndexSettings;
2323
import org.elasticsearch.index.shard.ShardId;
2424
import org.elasticsearch.indices.IndicesService;
25+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
2526
import org.elasticsearch.test.ESIntegTestCase;
2627
import org.elasticsearch.test.junit.annotations.TestLogging;
2728

@@ -345,12 +346,22 @@ public static class IndexFoldersDeletionListenerPlugin extends Plugin implements
345346
public List<IndexFoldersDeletionListener> getIndexFoldersDeletionListeners() {
346347
return List.of(new IndexFoldersDeletionListener() {
347348
@Override
348-
public void beforeIndexFoldersDeleted(Index index, IndexSettings indexSettings, Path[] indexPaths) {
349+
public void beforeIndexFoldersDeleted(
350+
Index index,
351+
IndexSettings indexSettings,
352+
Path[] indexPaths,
353+
IndexRemovalReason reason
354+
) {
349355
deletedIndices.add(index);
350356
}
351357

352358
@Override
353-
public void beforeShardFoldersDeleted(ShardId shardId, IndexSettings indexSettings, Path[] shardPaths) {
359+
public void beforeShardFoldersDeleted(
360+
ShardId shardId,
361+
IndexSettings indexSettings,
362+
Path[] shardPaths,
363+
IndexRemovalReason reason
364+
) {
354365
deletedShards.computeIfAbsent(shardId.getIndex(), i -> Collections.synchronizedList(new ArrayList<>())).add(shardId);
355366
}
356367
});

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexAliasesService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
import static java.util.Collections.emptyList;
4949
import static java.util.Collections.emptyMap;
50-
import static org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason.NO_LONGER_ASSIGNED;
50+
import static org.elasticsearch.indices.cluster.IndexRemovalReason.NO_LONGER_ASSIGNED;
5151

5252
/**
5353
* Service responsible for submitting add and remove aliases requests

server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282

8383
import static java.util.Collections.emptyMap;
8484
import static org.elasticsearch.cluster.metadata.MetadataCreateDataStreamService.validateTimestampFieldMapping;
85-
import static org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason.NO_LONGER_ASSIGNED;
85+
import static org.elasticsearch.indices.cluster.IndexRemovalReason.NO_LONGER_ASSIGNED;
8686

8787
/**
8888
* Service responsible for submitting index templates updates

server/src/main/java/org/elasticsearch/index/CompositeIndexEventListener.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import org.elasticsearch.index.shard.IndexShard;
2222
import org.elasticsearch.index.shard.IndexShardState;
2323
import org.elasticsearch.index.shard.ShardId;
24-
import org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason;
24+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
2525
import org.elasticsearch.threadpool.ThreadPool;
2626

2727
import java.util.Collection;

server/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
import org.elasticsearch.index.store.Store;
8282
import org.elasticsearch.index.translog.Translog;
8383
import org.elasticsearch.indices.breaker.CircuitBreakerService;
84+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
8485
import org.elasticsearch.indices.cluster.IndicesClusterStateService;
8586
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;
8687
import org.elasticsearch.indices.recovery.RecoveryState;
@@ -485,7 +486,12 @@ public synchronized IndexShard createShard(
485486
nodeEnv,
486487
lock,
487488
this.indexSettings,
488-
shardPaths -> indexFoldersDeletionListener.beforeShardFoldersDeleted(shardId, this.indexSettings, shardPaths)
489+
shardPaths -> indexFoldersDeletionListener.beforeShardFoldersDeleted(
490+
shardId,
491+
this.indexSettings,
492+
shardPaths,
493+
IndexRemovalReason.FAILURE
494+
)
489495
);
490496
path = ShardPath.loadShardPath(logger, nodeEnv, shardId, this.indexSettings.customDataPath());
491497
} catch (Exception inner) {
@@ -693,11 +699,11 @@ private void onShardClose(ShardLock lock) {
693699
try {
694700
eventListener.beforeIndexShardDeleted(lock.getShardId(), indexSettings.getSettings());
695701
} finally {
696-
shardStoreDeleter.deleteShardStore("delete index", lock, indexSettings);
702+
shardStoreDeleter.deleteShardStore("delete index", lock, indexSettings, IndexRemovalReason.DELETED);
697703
eventListener.afterIndexShardDeleted(lock.getShardId(), indexSettings.getSettings());
698704
}
699705
} catch (IOException e) {
700-
shardStoreDeleter.addPendingDelete(lock.getShardId(), indexSettings);
706+
shardStoreDeleter.addPendingDelete(lock.getShardId(), indexSettings, IndexRemovalReason.DELETED);
701707
logger.debug(() -> "[" + lock.getShardId().id() + "] failed to delete shard content - scheduled a retry", e);
702708
}
703709
}
@@ -1043,9 +1049,9 @@ public static Function<String, String> dateMathExpressionResolverAt(long instant
10431049
}
10441050

10451051
public interface ShardStoreDeleter {
1046-
void deleteShardStore(String reason, ShardLock lock, IndexSettings indexSettings) throws IOException;
1052+
void deleteShardStore(String reasonText, ShardLock lock, IndexSettings indexSettings, IndexRemovalReason reason) throws IOException;
10471053

1048-
void addPendingDelete(ShardId shardId, IndexSettings indexSettings);
1054+
void addPendingDelete(ShardId shardId, IndexSettings indexSettings, IndexRemovalReason reason);
10491055
}
10501056

10511057
public final EngineFactory getEngineFactory() {

server/src/main/java/org/elasticsearch/index/shard/IndexEventListener.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import org.elasticsearch.index.Index;
1616
import org.elasticsearch.index.IndexService;
1717
import org.elasticsearch.index.IndexSettings;
18-
import org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason;
18+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
1919

2020
/**
2121
* An index event listener is the primary extension point for plugins and build-in services

server/src/main/java/org/elasticsearch/indices/IndicesService.java

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
import org.elasticsearch.index.shard.SearchOperationListener;
129129
import org.elasticsearch.index.shard.ShardId;
130130
import org.elasticsearch.indices.breaker.CircuitBreakerService;
131+
import org.elasticsearch.indices.cluster.IndexRemovalReason;
131132
import org.elasticsearch.indices.cluster.IndicesClusterStateService;
132133
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;
133134
import org.elasticsearch.indices.recovery.PeerRecoveryTargetService;
@@ -966,7 +967,7 @@ public void removeIndex(
966967
listener.afterIndexRemoved(indexService.index(), indexSettings, reason);
967968
if (reason == IndexRemovalReason.DELETED) {
968969
// now we are done - try to wipe data on disk if possible
969-
deleteIndexStore(extraInfo, indexService.index(), indexSettings);
970+
deleteIndexStore(extraInfo, indexService.index(), indexSettings, reason);
970971
}
971972
}));
972973
});
@@ -1038,7 +1039,7 @@ public void deleteUnassignedIndex(String reason, IndexMetadata oldIndexMetadata,
10381039
+ "]"
10391040
);
10401041
}
1041-
deleteIndexStore(reason, oldIndexMetadata);
1042+
deleteIndexStore(reason, oldIndexMetadata, IndexRemovalReason.DELETED);
10421043
} catch (Exception e) {
10431044
logger.warn(() -> format("[%s] failed to delete unassigned index (reason [%s])", oldIndexMetadata.getIndex(), reason), e);
10441045
}
@@ -1051,7 +1052,7 @@ public void deleteUnassignedIndex(String reason, IndexMetadata oldIndexMetadata,
10511052
*
10521053
* Package private for testing
10531054
*/
1054-
void deleteIndexStore(String reason, IndexMetadata metadata) throws IOException {
1055+
void deleteIndexStore(String reasonText, IndexMetadata metadata, IndexRemovalReason reason) throws IOException {
10551056
if (nodeEnv.hasNodeFile()) {
10561057
synchronized (this) {
10571058
Index index = metadata.getIndex();
@@ -1069,33 +1070,35 @@ void deleteIndexStore(String reason, IndexMetadata metadata) throws IOException
10691070
}
10701071
}
10711072
final IndexSettings indexSettings = buildIndexSettings(metadata);
1072-
deleteIndexStore(reason, indexSettings.getIndex(), indexSettings);
1073+
deleteIndexStore(reasonText, indexSettings.getIndex(), indexSettings, reason);
10731074
}
10741075
}
10751076

1076-
private void deleteIndexStore(String reason, Index index, IndexSettings indexSettings) throws IOException {
1077-
deleteIndexStoreIfDeletionAllowed(reason, index, indexSettings, DEFAULT_INDEX_DELETION_PREDICATE);
1077+
private void deleteIndexStore(String reasonText, Index index, IndexSettings indexSettings, IndexRemovalReason reason)
1078+
throws IOException {
1079+
deleteIndexStoreIfDeletionAllowed(reasonText, index, indexSettings, DEFAULT_INDEX_DELETION_PREDICATE, reason);
10781080
}
10791081

10801082
private void deleteIndexStoreIfDeletionAllowed(
1081-
final String reason,
1083+
final String reasonText,
10821084
final Index index,
10831085
final IndexSettings indexSettings,
1084-
final IndexDeletionAllowedPredicate predicate
1086+
final IndexDeletionAllowedPredicate predicate,
1087+
final IndexRemovalReason reason
10851088
) throws IOException {
10861089
boolean success = false;
10871090
try {
10881091
// we are trying to delete the index store here - not a big deal if the lock can't be obtained
10891092
// the store metadata gets wiped anyway even without the lock this is just best effort since
10901093
// every shards deletes its content under the shard lock it owns.
1091-
logger.debug("{} deleting index store reason [{}]", index, reason);
1094+
logger.debug("{} deleting index store reason [{}]", index, reasonText);
10921095
if (predicate.apply(index, indexSettings)) {
10931096
// its safe to delete all index metadata and shard data
10941097
nodeEnv.deleteIndexDirectorySafe(
10951098
index,
10961099
0,
10971100
indexSettings,
1098-
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths)
1101+
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths, reason)
10991102
);
11001103
}
11011104
success = true;
@@ -1105,7 +1108,7 @@ private void deleteIndexStoreIfDeletionAllowed(
11051108
logger.warn(() -> format("%s failed to delete index", index), ex);
11061109
} finally {
11071110
if (success == false) {
1108-
addPendingDelete(index, indexSettings);
1111+
addPendingDelete(index, indexSettings, reason);
11091112
}
11101113
// this is a pure protection to make sure this index doesn't get re-imported as a dangling index.
11111114
// we should in the future rather write a tombstone rather than wiping the metadata.
@@ -1115,19 +1118,21 @@ private void deleteIndexStoreIfDeletionAllowed(
11151118

11161119
/**
11171120
* Deletes the shard with an already acquired shard lock.
1118-
* @param reason the reason for the shard deletion
1121+
* @param reasonText the reason for the shard deletion
11191122
* @param lock the lock of the shard to delete
11201123
* @param indexSettings the shards index settings.
1124+
* @param reason the reason for the deletion (as an enum)
11211125
* @throws IOException if an IOException occurs
11221126
*/
11231127
@Override
1124-
public void deleteShardStore(String reason, ShardLock lock, IndexSettings indexSettings) throws IOException {
1128+
public void deleteShardStore(String reasonText, ShardLock lock, IndexSettings indexSettings, IndexRemovalReason reason)
1129+
throws IOException {
11251130
ShardId shardId = lock.getShardId();
1126-
logger.trace("{} deleting shard reason [{}]", shardId, reason);
1131+
logger.trace("{} deleting shard reason [{}]", shardId, reasonText);
11271132
nodeEnv.deleteShardDirectoryUnderLock(
11281133
lock,
11291134
indexSettings,
1130-
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths)
1135+
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths, reason)
11311136
);
11321137
}
11331138

@@ -1139,13 +1144,14 @@ public void deleteShardStore(String reason, ShardLock lock, IndexSettings indexS
11391144
* On data nodes, if the deleted shard is the last shard folder in its index, the method will attempt to remove
11401145
* the index folder as well.
11411146
*
1142-
* @param reason the reason for the shard deletion
1147+
* @param reasonText the reason for the shard deletion
11431148
* @param shardId the shards ID to delete
11441149
* @param clusterState . This is required to access the indexes settings etc.
1150+
* @param reason The reason for the deletion (as an enum)
11451151
* @throws IOException if an IOException occurs
11461152
*/
1147-
public void deleteShardStore(String reason, ShardId shardId, ClusterState clusterState) throws IOException,
1148-
ShardLockObtainFailedException {
1153+
public void deleteShardStore(String reasonText, ShardId shardId, ClusterState clusterState, IndexRemovalReason reason)
1154+
throws IOException, ShardLockObtainFailedException {
11491155
final IndexMetadata metadata = clusterState.getMetadata().indices().get(shardId.getIndexName());
11501156

11511157
final IndexSettings indexSettings = buildIndexSettings(metadata);
@@ -1156,15 +1162,15 @@ public void deleteShardStore(String reason, ShardId shardId, ClusterState cluste
11561162
nodeEnv.deleteShardDirectorySafe(
11571163
shardId,
11581164
indexSettings,
1159-
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths)
1165+
paths -> indexFoldersDeletionListeners.beforeShardFoldersDeleted(shardId, indexSettings, paths, reason)
11601166
);
1161-
logger.debug("{} deleted shard reason [{}]", shardId, reason);
1167+
logger.debug("{} deleted shard reason [{}]", shardId, reasonText);
11621168

11631169
if (canDeleteIndexContents(shardId.getIndex())) {
11641170
if (nodeEnv.findAllShardIds(shardId.getIndex()).isEmpty()) {
11651171
try {
11661172
// note that deleteIndexStore have more safety checks and may throw an exception if index was concurrently created.
1167-
deleteIndexStore("no longer used", metadata);
1173+
deleteIndexStore("no longer used", metadata, reason);
11681174
} catch (Exception e) {
11691175
// wrap the exception to indicate we already deleted the shard
11701176
throw new ElasticsearchException("failed to delete unused index after deleting its last shard (" + shardId + ")", e);
@@ -1220,7 +1226,7 @@ public IndexMetadata verifyIndexIsDeleted(final Index index, final ClusterState
12201226
}
12211227
final IndexSettings indexSettings = buildIndexSettings(metadata);
12221228
try {
1223-
deleteIndexStoreIfDeletionAllowed("stale deleted index", index, indexSettings, ALWAYS_TRUE);
1229+
deleteIndexStoreIfDeletionAllowed("stale deleted index", index, indexSettings, ALWAYS_TRUE, IndexRemovalReason.DELETED);
12241230
} catch (Exception e) {
12251231
// we just warn about the exception here because if deleteIndexStoreIfDeletionAllowed
12261232
// throws an exception, it gets added to the list of pending deletes to be tried again
@@ -1278,22 +1284,22 @@ private IndexSettings buildIndexSettings(IndexMetadata metadata) {
12781284
* Adds a pending delete for the given index shard.
12791285
*/
12801286
@Override
1281-
public void addPendingDelete(ShardId shardId, IndexSettings settings) {
1287+
public void addPendingDelete(ShardId shardId, IndexSettings settings, IndexRemovalReason reason) {
12821288
if (shardId == null) {
12831289
throw new IllegalArgumentException("shardId must not be null");
12841290
}
12851291
if (settings == null) {
12861292
throw new IllegalArgumentException("settings must not be null");
12871293
}
1288-
PendingDelete pendingDelete = new PendingDelete(shardId, settings);
1294+
PendingDelete pendingDelete = new PendingDelete(shardId, settings, reason);
12891295
addPendingDelete(shardId.getIndex(), pendingDelete);
12901296
}
12911297

12921298
/**
12931299
* Adds a pending delete for the given index.
12941300
*/
1295-
public void addPendingDelete(Index index, IndexSettings settings) {
1296-
PendingDelete pendingDelete = new PendingDelete(index, settings);
1301+
public void addPendingDelete(Index index, IndexSettings settings, IndexRemovalReason reason) {
1302+
PendingDelete pendingDelete = new PendingDelete(index, settings, reason);
12971303
addPendingDelete(index, pendingDelete);
12981304
}
12991305

@@ -1309,25 +1315,28 @@ private static final class PendingDelete implements Comparable<PendingDelete> {
13091315
final int shardId;
13101316
final IndexSettings settings;
13111317
final boolean deleteIndex;
1318+
final IndexRemovalReason reason;
13121319

13131320
/**
13141321
* Creates a new pending delete of an index
13151322
*/
1316-
PendingDelete(ShardId shardId, IndexSettings settings) {
1323+
PendingDelete(ShardId shardId, IndexSettings settings, IndexRemovalReason reason) {
13171324
this.index = shardId.getIndex();
13181325
this.shardId = shardId.getId();
13191326
this.settings = settings;
13201327
this.deleteIndex = false;
1328+
this.reason = reason;
13211329
}
13221330

13231331
/**
13241332
* Creates a new pending delete of a shard
13251333
*/
1326-
PendingDelete(Index index, IndexSettings settings) {
1334+
PendingDelete(Index index, IndexSettings settings, IndexRemovalReason reason) {
13271335
this.index = index;
13281336
this.shardId = -1;
13291337
this.settings = settings;
13301338
this.deleteIndex = true;
1339+
this.reason = reason;
13311340
}
13321341

13331342
@Override
@@ -1391,7 +1400,12 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time
13911400
nodeEnv.deleteIndexDirectoryUnderLock(
13921401
index,
13931402
indexSettings,
1394-
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(index, indexSettings, paths)
1403+
paths -> indexFoldersDeletionListeners.beforeIndexFoldersDeleted(
1404+
index,
1405+
indexSettings,
1406+
paths,
1407+
delete.reason
1408+
)
13951409
);
13961410
iterator.remove();
13971411
} catch (IOException ex) {
@@ -1403,7 +1417,7 @@ public void processPendingDeletes(Index index, IndexSettings indexSettings, Time
14031417
final ShardLock shardLock = locks.get(shardId);
14041418
if (shardLock != null) {
14051419
try {
1406-
deleteShardStore("pending delete", shardLock, delete.settings);
1420+
deleteShardStore("pending delete", shardLock, delete.settings, delete.reason);
14071421
iterator.remove();
14081422
} catch (IOException ex) {
14091423
logger.debug(() -> format("%s retry pending delete", shardLock.getShardId()), ex);

0 commit comments

Comments
 (0)