Skip to content

Commit d93d333

Browse files
authored
Remove checking of sync commit ids (#114246)
A Lucene commit doesn't contain sync ids `SegmentInfos` anymore, so we can't rely on them during recovery. The fields was marked as deprecated in #102343.
1 parent d593194 commit d93d333

File tree

18 files changed

+41
-261
lines changed

18 files changed

+41
-261
lines changed

docs/reference/cat/shards.asciidoc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
====
1010
cat APIs are only intended for human consumption using the command line or {kib}
1111
console.
12-
They are _not_ intended for use by applications. For application
12+
They are _not_ intended for use by applications. For application
1313
consumption, use the <<cluster-state,cluster state API>>.
1414
====
1515

1616
The `shards` command is the detailed view of all nodes' shard <<shard-allocation-relocation-recovery,allocation>>.
17-
It will tell you if the shard is a primary or replica, the number of docs, the
18-
bytes it takes on disk, the node where it's located, and if the shard is
17+
It will tell you if the shard is a primary or replica, the number of docs, the
18+
bytes it takes on disk, the node where it's located, and if the shard is
1919
currently <<shard-allocation-relocation-recovery,recovering>>.
2020

2121
For <<data-streams,data streams>>, the API returns information about the stream's backing indices.
@@ -258,9 +258,6 @@ Time spent in suggest, such as `0`.
258258
`suggest.total`, `suto`, `suggestTotal`::
259259
Number of suggest operations, such as `0`.
260260

261-
`sync_id`::
262-
Sync ID of the shard.
263-
264261
`unassigned.at`, `ua`::
265262
Time at which the shard became unassigned in
266263
{wikipedia}/List_of_UTC_time_offsets[Coordinated Universal Time (UTC)].

rest-api-spec/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
6565
task.skipTest("indices.create/20_synthetic_source/nested object with unmapped fields", "temporary until backported")
6666
task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported")
6767
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
68+
task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
6869
})

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cat.shards/10_basic.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
ip .+ \n
2020
id .+ \n
2121
node .+ \n
22-
sync_id .+ \n
2322
unassigned.reason .+ \n
2423
unassigned.at .+ \n
2524
unassigned.for .+ \n

server/src/internalClusterTest/java/org/elasticsearch/gateway/RecoveryFromGatewayIT.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
import org.elasticsearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction;
1515
import org.elasticsearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction;
1616
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
17-
import org.elasticsearch.action.admin.indices.stats.IndexStats;
18-
import org.elasticsearch.action.admin.indices.stats.ShardStats;
1917
import org.elasticsearch.action.support.ActionTestUtils;
2018
import org.elasticsearch.cluster.ClusterState;
2119
import org.elasticsearch.cluster.coordination.ElectionSchedulerFactory;
@@ -30,7 +28,6 @@
3028
import org.elasticsearch.index.IndexService;
3129
import org.elasticsearch.index.IndexSettings;
3230
import org.elasticsearch.index.MergePolicyConfig;
33-
import org.elasticsearch.index.engine.Engine;
3431
import org.elasticsearch.index.query.QueryBuilders;
3532
import org.elasticsearch.index.shard.ShardId;
3633
import org.elasticsearch.index.shard.ShardPath;
@@ -577,13 +574,6 @@ public Settings onNodeStopped(String nodeName) throws Exception {
577574
}
578575
}
579576

580-
public void assertSyncIdsNotNull() {
581-
IndexStats indexStats = indicesAdmin().prepareStats("test").get().getIndex("test");
582-
for (ShardStats shardStats : indexStats.getShards()) {
583-
assertNotNull(shardStats.getCommitStats().getUserData().get(Engine.SYNC_COMMIT_ID));
584-
}
585-
}
586-
587577
public void testStartedShardFoundIfStateNotYetProcessed() throws Exception {
588578
// nodes may need to report the shards they processed the initial recovered cluster state from the master
589579
final String nodeName = internalCluster().startNode();

server/src/main/java/org/elasticsearch/cluster/routing/allocation/NodeAllocationResult.java

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -210,19 +210,8 @@ public String getAllocationId() {
210210
return allocationId;
211211
}
212212

213-
/**
214-
* Returns {@code true} if the shard copy has a matching sync id with the primary shard.
215-
* Returns {@code false} if the shard copy does not have a matching sync id with the primary
216-
* shard, or this explanation pertains to the allocation of a primary shard, in which case
217-
* matching sync ids are irrelevant.
218-
*/
219-
public boolean hasMatchingSyncId() {
220-
return matchingBytes == Long.MAX_VALUE;
221-
}
222-
223213
/**
224214
* Gets the number of matching bytes the shard copy has with the primary shard.
225-
* Returns {@code Long.MAX_VALUE} if {@link #hasMatchingSyncId()} returns {@code true}.
226215
* Returns -1 if not applicable (this value only applies to assigning replica shards).
227216
*/
228217
public long getMatchingBytes() {
@@ -263,11 +252,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
263252
builder.field("allocation_id", allocationId);
264253
}
265254
if (matchingBytes >= 0) {
266-
if (hasMatchingSyncId()) {
267-
builder.field("matching_sync_id", true);
268-
} else {
269-
builder.humanReadableField("matching_size_in_bytes", "matching_size", ByteSizeValue.ofBytes(matchingBytes));
270-
}
255+
builder.humanReadableField("matching_size_in_bytes", "matching_size", ByteSizeValue.ofBytes(matchingBytes));
271256
}
272257
if (storeException != null) {
273258
builder.startObject("store_exception");

server/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -439,14 +439,6 @@ private static long computeMatchingBytes(
439439
return sizeMatched;
440440
}
441441

442-
private static boolean hasMatchingSyncId(
443-
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
444-
TransportNodesListShardStoreMetadata.StoreFilesMetadata replicaStore
445-
) {
446-
String primarySyncId = primaryStore.syncId();
447-
return primarySyncId != null && primarySyncId.equals(replicaStore.syncId());
448-
}
449-
450442
private static MatchingNode computeMatchingNode(
451443
DiscoveryNode primaryNode,
452444
TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore,
@@ -455,8 +447,7 @@ private static MatchingNode computeMatchingNode(
455447
) {
456448
final long retainingSeqNoForPrimary = primaryStore.getPeerRecoveryRetentionLeaseRetainingSeqNo(primaryNode);
457449
final long retainingSeqNoForReplica = primaryStore.getPeerRecoveryRetentionLeaseRetainingSeqNo(replicaNode);
458-
final boolean isNoopRecovery = (retainingSeqNoForReplica >= retainingSeqNoForPrimary && retainingSeqNoForPrimary >= 0)
459-
|| hasMatchingSyncId(primaryStore, replicaStore);
450+
final boolean isNoopRecovery = (retainingSeqNoForReplica >= retainingSeqNoForPrimary && retainingSeqNoForPrimary >= 0);
460451
final long matchingBytes = computeMatchingBytes(primaryStore, replicaStore);
461452
return new MatchingNode(matchingBytes, retainingSeqNoForReplica, isNoopRecovery);
462453
}
@@ -470,9 +461,6 @@ private static boolean canPerformOperationBasedRecovery(
470461
if (targetNodeStore == null || targetNodeStore.storeFilesMetadata().isEmpty()) {
471462
return false;
472463
}
473-
if (hasMatchingSyncId(primaryStore, targetNodeStore.storeFilesMetadata())) {
474-
return true;
475-
}
476464
return primaryStore.getPeerRecoveryRetentionLeaseRetainingSeqNo(targetNode) >= 0;
477465
}
478466

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
import org.elasticsearch.core.Releasable;
5959
import org.elasticsearch.core.Releasables;
6060
import org.elasticsearch.core.TimeValue;
61-
import org.elasticsearch.core.UpdateForV9;
6261
import org.elasticsearch.index.IndexVersion;
6362
import org.elasticsearch.index.VersionType;
6463
import org.elasticsearch.index.mapper.DocumentParser;
@@ -117,8 +116,6 @@
117116

118117
public abstract class Engine implements Closeable {
119118

120-
@UpdateForV9(owner = UpdateForV9.Owner.DISTRIBUTED_INDEXING) // TODO: Remove sync_id in 9.0
121-
public static final String SYNC_COMMIT_ID = "sync_id";
122119
public static final String HISTORY_UUID_KEY = "history_uuid";
123120
public static final String FORCE_MERGE_UUID_KEY = "force_merge_uuid";
124121
public static final String MIN_RETAINED_SEQNO = "min_retained_seq_no";

server/src/main/java/org/elasticsearch/index/store/Store.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,14 +1150,6 @@ private int numSegmentFiles() { // only for asserts
11501150
return count;
11511151
}
11521152

1153-
/**
1154-
* Returns the sync id of the commit point that this MetadataSnapshot represents.
1155-
*
1156-
* @return sync id if exists, else null
1157-
*/
1158-
public String getSyncId() {
1159-
return commitUserData.get(Engine.SYNC_COMMIT_ID);
1160-
}
11611153
}
11621154

11631155
/**

server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java

Lines changed: 20 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -534,56 +534,30 @@ void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, A
534534
);
535535
}
536536
}
537-
// When sync ids were used we could use them to check if two shard copies were equivalent,
538-
// if that's the case we can skip sending files from the source shard to the target shard.
539537
// If the shard uses the current replication mechanism, we have to compute the recovery plan,
540538
// and it is still possible to skip the sending files from the source shard to the target shard
541539
// using a different mechanism to determine it.
542-
// TODO: is this still relevant today?
543-
if (hasSameLegacySyncId(recoverySourceMetadata, request.metadataSnapshot()) == false) {
544-
cancellableThreads.checkForCancel();
545-
SubscribableListener
546-
// compute the plan
547-
.<ShardRecoveryPlan>newForked(
548-
l -> recoveryPlannerService.computeRecoveryPlan(
549-
shard.shardId(),
550-
shardStateIdentifier,
551-
recoverySourceMetadata,
552-
request.metadataSnapshot(),
553-
startingSeqNo,
554-
translogOps.getAsInt(),
555-
getRequest().targetNode().getMaxIndexVersion(),
556-
canUseSnapshots(),
557-
request.isPrimaryRelocation(),
558-
l
559-
)
540+
cancellableThreads.checkForCancel();
541+
SubscribableListener
542+
// compute the plan
543+
.<ShardRecoveryPlan>newForked(
544+
l -> recoveryPlannerService.computeRecoveryPlan(
545+
shard.shardId(),
546+
shardStateIdentifier,
547+
recoverySourceMetadata,
548+
request.metadataSnapshot(),
549+
startingSeqNo,
550+
translogOps.getAsInt(),
551+
getRequest().targetNode().getMaxIndexVersion(),
552+
canUseSnapshots(),
553+
request.isPrimaryRelocation(),
554+
l
560555
)
561-
// perform the file recovery
562-
.<SendFileResult>andThen((l, plan) -> recoverFilesFromSourceAndSnapshot(plan, store, stopWatch, l))
563-
// and respond
564-
.addListener(listener);
565-
} else {
566-
logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
567-
SubscribableListener
568-
// but we must still create a retention lease
569-
.<RetentionLease>newForked(leaseListener -> createRetentionLease(startingSeqNo, leaseListener))
570-
// and then compute the result of sending no files
571-
.andThenApply(ignored -> {
572-
final TimeValue took = stopWatch.totalTime();
573-
logger.trace("recovery [phase1]: took [{}]", took);
574-
return new SendFileResult(
575-
Collections.emptyList(),
576-
Collections.emptyList(),
577-
0L,
578-
Collections.emptyList(),
579-
Collections.emptyList(),
580-
0L,
581-
took
582-
);
583-
})
584-
// and finally respond
585-
.addListener(listener);
586-
}
556+
)
557+
// perform the file recovery
558+
.<SendFileResult>andThen((l, plan) -> recoverFilesFromSourceAndSnapshot(plan, store, stopWatch, l))
559+
// and respond
560+
.addListener(listener);
587561
} catch (Exception e) {
588562
throw new RecoverFilesRecoveryException(request.shardId(), 0, ByteSizeValue.ZERO, e);
589563
}
@@ -1030,43 +1004,6 @@ private ActionListener<ReplicationResponse> wrapLeaseSyncListener(ActionListener
10301004
return new ThreadedActionListener<>(shard.getThreadPool().generic(), listener).map(ignored -> null);
10311005
}
10321006

1033-
boolean hasSameLegacySyncId(Store.MetadataSnapshot source, Store.MetadataSnapshot target) {
1034-
if (source.getSyncId() == null || source.getSyncId().equals(target.getSyncId()) == false) {
1035-
return false;
1036-
}
1037-
if (source.numDocs() != target.numDocs()) {
1038-
throw new IllegalStateException(
1039-
"try to recover "
1040-
+ request.shardId()
1041-
+ " from primary shard with sync id but number "
1042-
+ "of docs differ: "
1043-
+ source.numDocs()
1044-
+ " ("
1045-
+ request.sourceNode().getName()
1046-
+ ", primary) vs "
1047-
+ target.numDocs()
1048-
+ "("
1049-
+ request.targetNode().getName()
1050-
+ ")"
1051-
);
1052-
}
1053-
SequenceNumbers.CommitInfo sourceSeqNos = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(source.commitUserData().entrySet());
1054-
SequenceNumbers.CommitInfo targetSeqNos = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(target.commitUserData().entrySet());
1055-
if (sourceSeqNos.localCheckpoint() != targetSeqNos.localCheckpoint() || targetSeqNos.maxSeqNo() != sourceSeqNos.maxSeqNo()) {
1056-
final String message = "try to recover "
1057-
+ request.shardId()
1058-
+ " with sync id but "
1059-
+ "seq_no stats are mismatched: ["
1060-
+ source.commitUserData()
1061-
+ "] vs ["
1062-
+ target.commitUserData()
1063-
+ "]";
1064-
assert false : message;
1065-
throw new IllegalStateException(message);
1066-
}
1067-
return true;
1068-
}
1069-
10701007
void prepareTargetForTranslog(int totalTranslogOps, ActionListener<TimeValue> listener) {
10711008
StopWatch stopWatch = new StopWatch().start();
10721009
final ActionListener<Void> wrappedListener = ActionListener.wrap(nullVal -> {

server/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetadata.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -259,22 +259,9 @@ public long getPeerRecoveryRetentionLeaseRetainingSeqNo(DiscoveryNode node) {
259259
.orElse(-1L);
260260
}
261261

262-
/**
263-
* @return commit sync id if exists, else null
264-
*/
265-
public String syncId() {
266-
return metadataSnapshot.getSyncId();
267-
}
268-
269262
@Override
270263
public String toString() {
271-
return "StoreFilesMetadata{"
272-
+ ", metadataSnapshot{size="
273-
+ metadataSnapshot.size()
274-
+ ", syncId="
275-
+ metadataSnapshot.getSyncId()
276-
+ "}"
277-
+ '}';
264+
return "StoreFilesMetadata{" + ", metadataSnapshot{size=" + metadataSnapshot.size() + "}" + '}';
278265
}
279266
}
280267

0 commit comments

Comments
 (0)