Skip to content

Commit fa531b5

Browse files
authored
Move global checkpoint sync to write threadpool (#96506)
This commit moves the global checkpoint sync action to the write thread pool. Additionally, it moves the sync pathway to the same pathway as the location sync so that location syncs and global checkpoint syncs will worksteal against each other instead of generating independent syncs.
1 parent fe49e4f commit fa531b5

File tree

12 files changed

+211
-90
lines changed

12 files changed

+211
-90
lines changed

server/src/internalClusterTest/java/org/elasticsearch/action/RejectionActionIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
3737
.put("thread_pool.search.size", 1)
3838
.put("thread_pool.search.queue_size", 1)
3939
.put("thread_pool.write.size", 1)
40-
.put("thread_pool.write.queue_size", 1)
40+
// Needs to be 2 since we have concurrent indexing and global checkpoint syncs
41+
.put("thread_pool.write.queue_size", 2)
4142
.put("thread_pool.get.size", 1)
4243
.put("thread_pool.get.queue_size", 1)
4344
.build();

server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,10 @@ public void testDurableFlagHasEffect() {
163163
Translog.Location lastWriteLocation = tlog.getLastWriteLocation();
164164
try {
165165
// the lastWriteLocaltion has a Integer.MAX_VALUE size so we have to create a new one
166-
return tlog.ensureSynced(new Translog.Location(lastWriteLocation.generation, lastWriteLocation.translogLocation, 0));
166+
return tlog.ensureSynced(
167+
new Translog.Location(lastWriteLocation.generation, lastWriteLocation.translogLocation, 0),
168+
SequenceNumbers.UNASSIGNED_SEQ_NO
169+
);
167170
} catch (IOException e) {
168171
throw new UncheckedIOException(e);
169172
}

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,11 @@ public enum SearcherScope {
744744
*/
745745
public abstract void asyncEnsureTranslogSynced(Translog.Location location, Consumer<Exception> listener);
746746

747+
/**
748+
* Ensures that the global checkpoint has been persisted to the underlying storage.
749+
*/
750+
public abstract void asyncEnsureGlobalCheckpointSynced(long globalCheckpoint, Consumer<Exception> listener);
751+
747752
public abstract void syncTranslog() throws IOException;
748753

749754
/**

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ public class InternalEngine extends Engine {
171171
private final SoftDeletesPolicy softDeletesPolicy;
172172
private final LastRefreshedCheckpointListener lastRefreshedCheckpointListener;
173173
private final FlushListeners flushListener;
174-
private final AsyncIOProcessor<Translog.Location> translogSyncProcessor;
174+
private final AsyncIOProcessor<Tuple<Long, Translog.Location>> translogSyncProcessor;
175175

176176
private final CompletionStatsCache completionStatsCache;
177177

@@ -602,12 +602,23 @@ public boolean isTranslogSyncNeeded() {
602602
return getTranslog().syncNeeded();
603603
}
604604

605-
private AsyncIOProcessor<Translog.Location> createTranslogSyncProcessor(Logger logger, ThreadContext threadContext) {
605+
private AsyncIOProcessor<Tuple<Long, Translog.Location>> createTranslogSyncProcessor(Logger logger, ThreadContext threadContext) {
606606
return new AsyncIOProcessor<>(logger, 1024, threadContext) {
607607
@Override
608-
protected void write(List<Tuple<Translog.Location, Consumer<Exception>>> candidates) throws IOException {
608+
protected void write(List<Tuple<Tuple<Long, Translog.Location>, Consumer<Exception>>> candidates) throws IOException {
609609
try {
610-
final boolean synced = translog.ensureSynced(candidates.stream().map(Tuple::v1));
610+
Translog.Location location = Translog.Location.EMPTY;
611+
long processGlobalCheckpoint = SequenceNumbers.UNASSIGNED_SEQ_NO;
612+
for (Tuple<Tuple<Long, Translog.Location>, Consumer<Exception>> syncMarkers : candidates) {
613+
Tuple<Long, Translog.Location> marker = syncMarkers.v1();
614+
long globalCheckpointToSync = marker.v1();
615+
if (globalCheckpointToSync != SequenceNumbers.UNASSIGNED_SEQ_NO) {
616+
processGlobalCheckpoint = SequenceNumbers.max(processGlobalCheckpoint, globalCheckpointToSync);
617+
}
618+
location = location.compareTo(marker.v2()) >= 0 ? location : marker.v2();
619+
}
620+
621+
final boolean synced = translog.ensureSynced(location, processGlobalCheckpoint);
611622
if (synced) {
612623
revisitIndexDeletionPolicyOnTranslogSynced();
613624
}
@@ -624,7 +635,12 @@ protected void write(List<Tuple<Translog.Location, Consumer<Exception>>> candida
624635

625636
@Override
626637
public void asyncEnsureTranslogSynced(Translog.Location location, Consumer<Exception> listener) {
627-
translogSyncProcessor.put(location, listener);
638+
translogSyncProcessor.put(new Tuple<>(SequenceNumbers.NO_OPS_PERFORMED, location), listener);
639+
}
640+
641+
@Override
642+
public void asyncEnsureGlobalCheckpointSynced(long globalCheckpoint, Consumer<Exception> listener) {
643+
translogSyncProcessor.put(new Tuple<>(globalCheckpoint, Translog.Location.EMPTY), listener);
628644
}
629645

630646
@Override

server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,11 @@ public void asyncEnsureTranslogSynced(Translog.Location location, Consumer<Excep
337337
listener.accept(null);
338338
}
339339

340+
@Override
341+
public void asyncEnsureGlobalCheckpointSynced(long globalCheckpoint, Consumer<Exception> listener) {
342+
listener.accept(null);
343+
}
344+
340345
@Override
341346
public void syncTranslog() {}
342347

server/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ public GlobalCheckpointSyncAction(
6262
actionFilters,
6363
Request::new,
6464
Request::new,
65-
ThreadPool.Names.MANAGEMENT
65+
ThreadPool.Names.WRITE,
66+
false,
67+
true
6668
);
6769
}
6870

@@ -77,24 +79,26 @@ protected void shardOperationOnPrimary(
7779
IndexShard indexShard,
7880
ActionListener<PrimaryResult<Request, ReplicationResponse>> listener
7981
) {
80-
ActionListener.completeWith(listener, () -> {
81-
maybeSyncTranslog(indexShard);
82-
return new PrimaryResult<>(request, new ReplicationResponse());
83-
});
82+
maybeSyncTranslog(indexShard, listener.map(v -> new PrimaryResult<>(request, new ReplicationResponse())));
8483
}
8584

8685
@Override
8786
protected void shardOperationOnReplica(Request shardRequest, IndexShard replica, ActionListener<ReplicaResult> listener) {
88-
ActionListener.completeWith(listener, () -> {
89-
maybeSyncTranslog(replica);
90-
return new ReplicaResult();
91-
});
87+
maybeSyncTranslog(replica, listener.map(v -> new ReplicaResult()));
9288
}
9389

94-
private static void maybeSyncTranslog(final IndexShard indexShard) throws IOException {
90+
private static <T> void maybeSyncTranslog(IndexShard indexShard, ActionListener<Void> listener) {
9591
if (indexShard.getTranslogDurability() == Translog.Durability.REQUEST
9692
&& indexShard.getLastSyncedGlobalCheckpoint() < indexShard.getLastKnownGlobalCheckpoint()) {
97-
indexShard.sync();
93+
indexShard.syncGlobalCheckpoint(indexShard.getLastKnownGlobalCheckpoint(), e -> {
94+
if (e == null) {
95+
listener.onResponse(null);
96+
} else {
97+
listener.onFailure(e);
98+
}
99+
});
100+
} else {
101+
listener.onResponse(null);
98102
}
99103
}
100104

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3611,6 +3611,17 @@ public final void sync(Translog.Location location, Consumer<Exception> syncListe
36113611
getEngine().asyncEnsureTranslogSynced(location, syncListener);
36123612
}
36133613

3614+
/**
3615+
* This method provides the same behavior as #sync but for persisting the global checkpoint. It will initiate a sync
3616+
* if the request global checkpoint is greater than the currently persisted global checkpoint. However, same as #sync it
3617+
* will not ensure that the request global checkpoint is available to be synced. It is the caller's duty to only call this
3618+
* method with a valid processed global checkpoint that is available to sync.
3619+
*/
3620+
public void syncGlobalCheckpoint(long globalCheckpoint, Consumer<Exception> syncListener) {
3621+
verifyNotClosed();
3622+
getEngine().asyncEnsureGlobalCheckpointSynced(globalCheckpoint, syncListener);
3623+
}
3624+
36143625
public void sync() throws IOException {
36153626
verifyNotClosed();
36163627
getEngine().syncTranslog();

server/src/main/java/org/elasticsearch/index/translog/Translog.java

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
import java.util.Iterator;
5252
import java.util.List;
5353
import java.util.Objects;
54-
import java.util.Optional;
5554
import java.util.OptionalLong;
5655
import java.util.concurrent.atomic.AtomicBoolean;
5756
import java.util.concurrent.locks.ReadWriteLock;
@@ -839,15 +838,18 @@ public void trimOperations(long belowTerm, long aboveSeqNo) throws IOException {
839838
}
840839

841840
/**
842-
* Ensures that the given location has be synced / written to the underlying storage.
841+
* Ensures that the given location and global checkpoint has be synced / written to the underlying storage.
843842
*
844843
* @return Returns <code>true</code> iff this call caused an actual sync operation otherwise <code>false</code>
845844
*/
846-
public boolean ensureSynced(Location location) throws IOException {
845+
public boolean ensureSynced(Location location, long globalCheckpoint) throws IOException {
847846
try (ReleasableLock lock = readLock.acquire()) {
848-
if (location.generation == current.getGeneration()) { // if we have a new one it's already synced
847+
// if we have a new generation and the persisted global checkpoint is greater than or equal to the sync global checkpoint it's
848+
// already synced
849+
long persistedGlobalCheckpoint = current.getLastSyncedCheckpoint().globalCheckpoint;
850+
if (location.generation == current.getGeneration() || persistedGlobalCheckpoint < globalCheckpoint) {
849851
ensureOpen();
850-
return current.syncUpTo(location.translogLocation + location.size);
852+
return current.syncUpTo(location.translogLocation + location.size, globalCheckpoint);
851853
}
852854
} catch (final Exception ex) {
853855
closeOnTragicEvent(ex);
@@ -856,24 +858,6 @@ public boolean ensureSynced(Location location) throws IOException {
856858
return false;
857859
}
858860

859-
/**
860-
* Ensures that all locations in the given stream have been synced / written to the underlying storage.
861-
* This method allows for internal optimization to minimize the amount of fsync operations if multiple
862-
* locations must be synced.
863-
*
864-
* @return Returns <code>true</code> iff this call caused an actual sync operation otherwise <code>false</code>
865-
*/
866-
public boolean ensureSynced(Stream<Location> locations) throws IOException {
867-
final Optional<Location> max = locations.max(Location::compareTo);
868-
// we only need to sync the max location since it will sync all other
869-
// locations implicitly
870-
if (max.isPresent()) {
871-
return ensureSynced(max.get());
872-
} else {
873-
return false;
874-
}
875-
}
876-
877861
/**
878862
* Closes the translog if the current translog writer experienced a tragic exception.
879863
*
@@ -929,6 +913,8 @@ public TranslogDeletionPolicy getDeletionPolicy() {
929913

930914
public static class Location implements Comparable<Location> {
931915

916+
public static Location EMPTY = new Location(0, 0, 0);
917+
932918
public final long generation;
933919
public final long translogLocation;
934920
public final int size;

server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ synchronized boolean assertNoSeqAbove(long belowTerm, long aboveSeqNo) {
339339
* raising the exception.
340340
*/
341341
public void sync() throws IOException {
342-
syncUpTo(Long.MAX_VALUE);
342+
syncUpTo(Long.MAX_VALUE, SequenceNumbers.UNASSIGNED_SEQ_NO);
343343
}
344344

345345
/**
@@ -455,10 +455,17 @@ private long getWrittenOffset() throws IOException {
455455
*
456456
* @return <code>true</code> if this call caused an actual sync operation
457457
*/
458-
final boolean syncUpTo(long offset) throws IOException {
459-
if (lastSyncedCheckpoint.offset < offset && syncNeeded()) {
458+
final boolean syncUpTo(long offset, long globalCheckpointToPersist) throws IOException {
459+
if ((lastSyncedCheckpoint.offset < offset || lastSyncedCheckpoint.globalCheckpoint < globalCheckpointToPersist) && syncNeeded()) {
460+
assert globalCheckpointToPersist <= globalCheckpointSupplier.getAsLong()
461+
: "globalCheckpointToPersist ["
462+
+ globalCheckpointToPersist
463+
+ "] greater than global checkpoint ["
464+
+ globalCheckpointSupplier.getAsLong()
465+
+ "]";
460466
synchronized (syncLock) { // only one sync/checkpoint should happen concurrently but we wait
461-
if (lastSyncedCheckpoint.offset < offset && syncNeeded()) {
467+
if ((lastSyncedCheckpoint.offset < offset || lastSyncedCheckpoint.globalCheckpoint < globalCheckpointToPersist)
468+
&& syncNeeded()) {
462469
// double checked locking - we don't want to fsync unless we have to and now that we have
463470
// the lock we should check again since if this code is busy we might have fsynced enough already
464471
final Checkpoint checkpointToSync;

server/src/test/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncActionTests.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,13 @@
2727
import org.elasticsearch.transport.TransportService;
2828

2929
import java.util.Collections;
30+
import java.util.function.Consumer;
3031

3132
import static org.elasticsearch.test.ClusterServiceUtils.createClusterService;
33+
import static org.mockito.ArgumentMatchers.any;
34+
import static org.mockito.ArgumentMatchers.anyLong;
35+
import static org.mockito.ArgumentMatchers.eq;
36+
import static org.mockito.Mockito.doAnswer;
3237
import static org.mockito.Mockito.mock;
3338
import static org.mockito.Mockito.never;
3439
import static org.mockito.Mockito.verify;
@@ -100,6 +105,11 @@ public void testTranslogSyncAfterGlobalCheckpointSync() throws Exception {
100105

101106
when(indexShard.getLastKnownGlobalCheckpoint()).thenReturn(globalCheckpoint);
102107
when(indexShard.getLastSyncedGlobalCheckpoint()).thenReturn(lastSyncedGlobalCheckpoint);
108+
doAnswer(invocation -> {
109+
Consumer<Exception> argument = invocation.getArgument(1);
110+
argument.accept(null);
111+
return null;
112+
}).when(indexShard).syncGlobalCheckpoint(anyLong(), any());
103113

104114
final GlobalCheckpointSyncAction action = new GlobalCheckpointSyncAction(
105115
Settings.EMPTY,
@@ -123,9 +133,10 @@ public void testTranslogSyncAfterGlobalCheckpointSync() throws Exception {
123133

124134
if (durability == Translog.Durability.ASYNC || lastSyncedGlobalCheckpoint == globalCheckpoint) {
125135
verify(indexShard, never()).sync();
136+
verify(indexShard, never()).syncGlobalCheckpoint(anyLong(), any());
126137
} else {
127-
verify(indexShard).sync();
138+
verify(indexShard, never()).sync();
139+
verify(indexShard).syncGlobalCheckpoint(eq(globalCheckpoint), any());
128140
}
129141
}
130-
131142
}

0 commit comments

Comments
 (0)