|
11 | 11 |
|
12 | 12 | import org.elasticsearch.action.ActionListener; |
13 | 13 | import org.elasticsearch.action.support.SubscribableListener; |
14 | | -import org.elasticsearch.cluster.SnapshotsInProgress; |
15 | 14 | import org.elasticsearch.cluster.service.ClusterService; |
16 | | -import org.elasticsearch.common.Strings; |
17 | | -import org.elasticsearch.index.IndexReshardService; |
18 | 15 | import org.elasticsearch.index.IndexVersion; |
19 | 16 | import org.elasticsearch.index.shard.IndexShard; |
20 | | -import org.elasticsearch.index.shard.IndexShardState; |
21 | 17 | import org.elasticsearch.index.shard.ShardId; |
22 | | -import org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException; |
23 | 18 | import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; |
24 | 19 | import org.elasticsearch.indices.IndicesService; |
25 | 20 | import org.elasticsearch.logging.LogManager; |
26 | 21 | import org.elasticsearch.logging.Logger; |
27 | 22 | import org.elasticsearch.snapshots.Snapshot; |
28 | | -import org.elasticsearch.threadpool.ThreadPool; |
29 | 23 |
|
30 | 24 | import java.io.IOException; |
31 | 25 |
|
| 26 | +import static org.elasticsearch.repositories.SnapshotShardContextHelper.acquireSnapshotIndexCommit; |
| 27 | +import static org.elasticsearch.repositories.SnapshotShardContextHelper.closeSnapshotIndexCommit; |
32 | 28 | import static org.elasticsearch.snapshots.SnapshotShardsService.getShardStateId; |
33 | 29 |
|
34 | 30 | /** |
@@ -57,157 +53,34 @@ public SubscribableListener<SnapshotShardContext> asyncCreate( |
57 | 53 | long snapshotStartTime, |
58 | 54 | ActionListener<ShardSnapshotResult> listener |
59 | 55 | ) throws IOException { |
60 | | - |
61 | 56 | final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShard(shardId.id()); |
62 | | - if (indexShard.routingEntry().primary() == false) { |
63 | | - throw new IndexShardSnapshotFailedException(shardId, "snapshot should be performed only on primary"); |
64 | | - } |
65 | | - if (indexShard.routingEntry().relocating()) { |
66 | | - // do not snapshot when in the process of relocation of primaries so we won't get conflicts |
67 | | - throw new IndexShardSnapshotFailedException(shardId, "cannot snapshot while relocating"); |
68 | | - } |
69 | | - |
70 | | - final IndexShardState indexShardState = indexShard.state(); |
71 | | - if (indexShardState == IndexShardState.CREATED || indexShardState == IndexShardState.RECOVERING) { |
72 | | - // shard has just been created, or still recovering |
73 | | - throw new IndexShardSnapshotFailedException(shardId, "shard didn't fully recover yet"); |
74 | | - } |
75 | | - |
76 | | - SnapshotIndexCommit snapshotIndexCommit = null; |
| 57 | + final var snapshotIndexCommit = acquireSnapshotIndexCommit( |
| 58 | + clusterService, |
| 59 | + indexShard, |
| 60 | + snapshot, |
| 61 | + supportsRelocationDuringSnapshot(), |
| 62 | + snapshotStatus |
| 63 | + ); |
77 | 64 | try { |
78 | | - snapshotStatus.updateStatusDescription("acquiring commit reference from IndexShard: triggers a shard flush"); |
79 | | - snapshotIndexCommit = new SnapshotIndexCommit(indexShard.acquireIndexCommitForSnapshot()); |
80 | | - |
81 | | - // The check below is needed to handle shard snapshots during resharding. |
82 | | - // Resharding changes the number of shards in the index and moves data between shards. |
83 | | - // These processes may cause shard snapshots to be inconsistent with each other (e.g. caught in between data movements) |
84 | | - // or to be out of sync with index metadata (e.g. a newly added shard is not present in the snapshot). |
85 | | - // We want to detect if a resharding operation has happened after this snapshot was started |
86 | | - // and if so we'll fail the shard snapshot to avoid such inconsistency. |
87 | | - // We perform this check here on the data node and not on the master node |
88 | | - // to correctly propagate this failure to SnapshotsService using existing listener |
89 | | - // in case resharding starts in the middle of the snapshot. |
90 | | - // Marking shard as failed directly in the cluster state would bypass parts of SnapshotsService logic. |
91 | | - |
92 | | - // We obtain a new `SnapshotsInProgress.Entry` here in order to not capture the original in the Runnable. |
93 | | - // The information that we are interested in (the shards map keys) doesn't change so this is fine. |
94 | | - SnapshotsInProgress.Entry snapshotEntry = SnapshotsInProgress.get(clusterService.state()).snapshot(snapshot); |
95 | | - // The snapshot is deleted, there is no reason to proceed. |
96 | | - if (snapshotEntry == null) { |
97 | | - throw new IndexShardSnapshotFailedException(shardId, "snapshot is deleted"); |
98 | | - } |
99 | | - |
100 | | - int maximumShardIdForIndexInTheSnapshot = calculateMaximumShardIdForIndexInTheSnapshot(shardId, snapshotEntry); |
101 | | - if (IndexReshardService.isShardSnapshotImpactedByResharding( |
102 | | - indexShard.indexSettings().getIndexMetadata(), |
103 | | - maximumShardIdForIndexInTheSnapshot |
104 | | - )) { |
105 | | - throw new IndexShardSnapshotFailedException(shardId, "cannot snapshot a shard during resharding"); |
106 | | - } |
107 | | - |
108 | | - snapshotStatus.updateStatusDescription("commit reference acquired, proceeding with snapshot"); |
109 | 65 | final var shardStateId = getShardStateId(indexShard, snapshotIndexCommit.indexCommit()); // not aborted so indexCommit() ok |
110 | | - snapshotStatus.addAbortListener(makeAbortListener(indexShard.shardId(), snapshot, snapshotIndexCommit)); |
111 | | - snapshotStatus.ensureNotAborted(); |
112 | | - |
113 | | - final var snapshotShardContextListener = doAsyncCreate( |
114 | | - shardId, |
115 | | - snapshot, |
116 | | - indexId, |
117 | | - snapshotStatus, |
118 | | - repositoryMetaVersion, |
119 | | - snapshotStartTime, |
120 | | - listener, |
121 | | - indexShard, |
122 | | - snapshotIndexCommit, |
123 | | - shardStateId |
| 66 | + return SubscribableListener.newSucceeded( |
| 67 | + new LocalPrimarySnapshotShardContext( |
| 68 | + indexShard.store(), |
| 69 | + indexShard.mapperService(), |
| 70 | + snapshot.getSnapshotId(), |
| 71 | + indexId, |
| 72 | + snapshotIndexCommit, |
| 73 | + shardStateId, |
| 74 | + snapshotStatus, |
| 75 | + repositoryMetaVersion, |
| 76 | + snapshotStartTime, |
| 77 | + listener |
| 78 | + ) |
124 | 79 | ); |
125 | | - snapshotIndexCommit = null; |
126 | | - return snapshotShardContextListener; |
127 | | - } finally { |
128 | | - if (snapshotIndexCommit != null) { |
129 | | - snapshotIndexCommit.closingBefore(new ActionListener<Void>() { |
130 | | - @Override |
131 | | - public void onResponse(Void unused) {} |
132 | | - |
133 | | - @Override |
134 | | - public void onFailure(Exception e) { |
135 | | - // we're already failing exceptionally, and prefer to propagate the original exception instead of this one |
136 | | - logger.warn(Strings.format("exception closing commit for [%s] in [%s]", shardId, snapshot), e); |
137 | | - } |
138 | | - }).onResponse(null); |
139 | | - } |
| 80 | + } catch (Exception e) { |
| 81 | + closeSnapshotIndexCommit(snapshotIndexCommit, shardId, snapshot); |
| 82 | + throw e; |
140 | 83 | } |
141 | 84 | } |
142 | 85 |
|
143 | | - protected SubscribableListener<SnapshotShardContext> doAsyncCreate( |
144 | | - ShardId shardId, |
145 | | - Snapshot snapshot, |
146 | | - IndexId indexId, |
147 | | - IndexShardSnapshotStatus snapshotStatus, |
148 | | - IndexVersion repositoryMetaVersion, |
149 | | - long snapshotStartTime, |
150 | | - ActionListener<ShardSnapshotResult> listener, |
151 | | - IndexShard indexShard, |
152 | | - SnapshotIndexCommit snapshotIndexCommit, |
153 | | - String shardStateId |
154 | | - ) { |
155 | | - return SubscribableListener.newSucceeded( |
156 | | - new LocalPrimarySnapshotShardContext( |
157 | | - indexShard.store(), |
158 | | - indexShard.mapperService(), |
159 | | - snapshot.getSnapshotId(), |
160 | | - indexId, |
161 | | - snapshotIndexCommit, |
162 | | - shardStateId, |
163 | | - snapshotStatus, |
164 | | - repositoryMetaVersion, |
165 | | - snapshotStartTime, |
166 | | - listener |
167 | | - ) |
168 | | - ); |
169 | | - } |
170 | | - |
171 | | - private static int calculateMaximumShardIdForIndexInTheSnapshot(ShardId shardIdStartingASnapshot, SnapshotsInProgress.Entry entry) { |
172 | | - int maximum = shardIdStartingASnapshot.id(); |
173 | | - int i = maximum + 1; |
174 | | - |
175 | | - while (entry.shards().containsKey(new ShardId(shardIdStartingASnapshot.getIndex(), i))) { |
176 | | - maximum = i; |
177 | | - i += 1; |
178 | | - } |
179 | | - |
180 | | - return maximum; |
181 | | - } |
182 | | - |
183 | | - static ActionListener<IndexShardSnapshotStatus.AbortStatus> makeAbortListener( |
184 | | - ShardId shardId, |
185 | | - Snapshot snapshot, |
186 | | - SnapshotIndexCommit snapshotIndexCommit |
187 | | - ) { |
188 | | - return new ActionListener<>() { |
189 | | - @Override |
190 | | - public void onResponse(IndexShardSnapshotStatus.AbortStatus abortStatus) { |
191 | | - if (abortStatus == IndexShardSnapshotStatus.AbortStatus.ABORTED) { |
192 | | - assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.GENERIC, ThreadPool.Names.SNAPSHOT); |
193 | | - snapshotIndexCommit.onAbort(); |
194 | | - } |
195 | | - } |
196 | | - |
197 | | - @Override |
198 | | - public void onFailure(Exception e) { |
199 | | - logger.error(() -> Strings.format("unexpected failure in %s", description()), e); |
200 | | - assert false : e; |
201 | | - } |
202 | | - |
203 | | - @Override |
204 | | - public String toString() { |
205 | | - return description(); |
206 | | - } |
207 | | - |
208 | | - private String description() { |
209 | | - return Strings.format("abort listener for [%s] in [%s]", shardId, snapshot); |
210 | | - } |
211 | | - }; |
212 | | - } |
213 | 86 | } |
0 commit comments