Skip to content

Commit 7f259d7

Browse files
committed
More info on failures
1 parent 7d23476 commit 7f259d7

File tree

1 file changed

+87
-30
lines changed

1 file changed

+87
-30
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/MetadataVerifier.java

Lines changed: 87 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.action.ActionRunnable;
1515
import org.elasticsearch.action.admin.cluster.repositories.integrity.VerifyRepositoryIntegrityAction;
1616
import org.elasticsearch.action.support.ListenableActionFuture;
17+
import org.elasticsearch.cluster.metadata.Metadata;
1718
import org.elasticsearch.common.CheckedSupplier;
1819
import org.elasticsearch.common.blobstore.support.BlobMetadata;
1920
import org.elasticsearch.common.unit.ByteSizeUnit;
@@ -37,7 +38,6 @@
3738
import java.util.Iterator;
3839
import java.util.List;
3940
import java.util.Map;
40-
import java.util.Objects;
4141
import java.util.Queue;
4242
import java.util.Set;
4343
import java.util.concurrent.ConcurrentLinkedQueue;
@@ -100,9 +100,10 @@ public void close() {
100100
}
101101

102102
private void addFailure(String format, Object... args) {
103-
if (failureCount.incrementAndGet() <= verifyRequest.getMaxFailures()) {
103+
final var failureNumber = failureCount.incrementAndGet();
104+
if (failureNumber <= verifyRequest.getMaxFailures()) {
104105
final var failure = format(format, args);
105-
logger.debug("[{}] found metadata verification failure: {}", repositoryName, failure);
106+
logger.debug("[{}] found metadata verification failure [{}]: {}", repositoryName, failureNumber, failure);
106107
failures.add(new RepositoryVerificationException(repositoryName, failure));
107108
}
108109
}
@@ -111,8 +112,9 @@ private void addFailure(Exception exception) {
111112
if (isCancelledSupplier.getAsBoolean() && exception instanceof TaskCancelledException) {
112113
return;
113114
}
114-
if (failureCount.incrementAndGet() <= verifyRequest.getMaxFailures()) {
115-
logger.debug(() -> format("[%s] exception during metadata verification: {}", repositoryName), exception);
115+
final var failureNumber = failureCount.incrementAndGet();
116+
if (failureNumber <= verifyRequest.getMaxFailures()) {
117+
logger.debug(() -> format("[%s] exception [%d] during metadata verification", repositoryName, failureNumber), exception);
116118
failures.add(
117119
exception instanceof RepositoryVerificationException rve
118120
? rve
@@ -168,13 +170,28 @@ private void verifySnapshot(RefCounted snapshotRefs, SnapshotId snapshotId) {
168170
}
169171
}));
170172

171-
forkSupply(snapshotRefs, () -> blobStoreRepository.getSnapshotGlobalMetadata(snapshotId), metadata -> {
172-
if (metadata.indices().isEmpty() == false) {
173+
forkSupply(snapshotRefs, () -> getSnapshotGlobalMetadata(snapshotId), metadata -> {
174+
if (metadata != null && metadata.indices().isEmpty() == false) {
173175
addFailure("snapshot [%s] contains unexpected index metadata within global metadata", snapshotId);
174176
}
175177
});
176178
}
177179

180+
private Metadata getSnapshotGlobalMetadata(SnapshotId snapshotId) {
181+
try {
182+
return blobStoreRepository.getSnapshotGlobalMetadata(snapshotId);
183+
} catch (Exception e) {
184+
addFailure(
185+
new RepositoryVerificationException(
186+
repositoryName,
187+
format("failed to get snapshot global metadata for [%s]", snapshotId),
188+
e
189+
)
190+
);
191+
return null;
192+
}
193+
}
194+
178195
private void verifyIndices() {
179196
final var indicesMap = repositoryData.getIndices();
180197

@@ -232,22 +249,14 @@ private void verifyIndexSnapshot(RefCounted indexSnapshotRefs, SnapshotId snapsh
232249
shardCountListenersByBlobId.computeIfAbsent(indexMetaBlobId, ignored -> {
233250
final var shardCountFuture = new ListenableActionFuture<Integer>();
234251
forkSupply(() -> {
235-
final var shardCount = blobStoreRepository.getSnapshotIndexMetaData(repositoryData, snapshotId, indexId)
236-
.getNumberOfShards();
252+
final var shardCount = getNumberOfShards(indexMetaBlobId, snapshotId);
237253
for (int i = 0; i < shardCount; i++) {
238254
shardContainerContentsListener.computeIfAbsent(i, shardId -> {
239255
final var shardContainerContentsFuture = new ListenableActionFuture<ShardContainerContents>();
240256
forkSupply(
241257
() -> new ShardContainerContents(
242258
blobStoreRepository.shardContainer(indexId, shardId).listBlobs(),
243-
blobStoreRepository.getBlobStoreIndexShardSnapshots(
244-
indexId,
245-
shardId,
246-
Objects.requireNonNull(
247-
repositoryData.shardGenerations().getShardGen(indexId, shardId),
248-
"shard generations for " + indexId + "/" + shardId
249-
)
250-
)
259+
getBlobStoreIndexShardSnapshots(shardId)
251260
),
252261
shardContainerContentsFuture
253262
);
@@ -266,10 +275,7 @@ private void verifyIndexSnapshot(RefCounted indexSnapshotRefs, SnapshotId snapsh
266275
indexSnapshotRefs,
267276
shardContainerContents -> forkSupply(
268277
indexSnapshotRefs,
269-
() -> blobStoreRepository.loadShardSnapshot(
270-
blobStoreRepository.shardContainer(indexId, shardId),
271-
snapshotId
272-
),
278+
() -> getBlobStoreIndexShardSnapshot(snapshotId, shardId),
273279
shardSnapshot -> verifyShardSnapshot(snapshotId, shardId, shardContainerContents, shardSnapshot)
274280
)
275281
)
@@ -278,12 +284,60 @@ private void verifyIndexSnapshot(RefCounted indexSnapshotRefs, SnapshotId snapsh
278284
}));
279285
}
280286

287+
private BlobStoreIndexShardSnapshot getBlobStoreIndexShardSnapshot(SnapshotId snapshotId, int shardId) {
288+
try {
289+
return blobStoreRepository.loadShardSnapshot(blobStoreRepository.shardContainer(indexId, shardId), snapshotId);
290+
} catch (Exception e) {
291+
addFailure(
292+
new RepositoryVerificationException(
293+
repositoryName,
294+
format("failed to load shard %s/%d snapshot for %s", indexId, shardId, snapshotId),
295+
e
296+
)
297+
);
298+
return null;
299+
}
300+
}
301+
302+
private int getNumberOfShards(String indexMetaBlobId, SnapshotId snapshotId) {
303+
try {
304+
return blobStoreRepository.getSnapshotIndexMetaData(repositoryData, snapshotId, indexId).getNumberOfShards();
305+
} catch (Exception e) {
306+
addFailure(
307+
new RepositoryVerificationException(
308+
repositoryName,
309+
format("failed to load index %s metadata for %s from blob [%s]", indexId, snapshotId, indexMetaBlobId),
310+
e
311+
)
312+
);
313+
return 0;
314+
}
315+
}
316+
317+
private BlobStoreIndexShardSnapshots getBlobStoreIndexShardSnapshots(int shardId) {
318+
final var shardGen = repositoryData.shardGenerations().getShardGen(indexId, shardId);
319+
if (shardGen == null) {
320+
addFailure("unknown shard generation for %s/%d", indexId, shardId);
321+
return null;
322+
}
323+
try {
324+
return blobStoreRepository.getBlobStoreIndexShardSnapshots(indexId, shardId, shardGen);
325+
} catch (Exception e) {
326+
addFailure(e);
327+
return null;
328+
}
329+
}
330+
281331
private void verifyShardSnapshot(
282332
SnapshotId snapshotId,
283333
int shardId,
284334
ShardContainerContents shardContainerContents,
285335
BlobStoreIndexShardSnapshot shardSnapshot
286336
) {
337+
if (shardSnapshot == null) {
338+
return;
339+
}
340+
287341
if (shardSnapshot.snapshot().equals(snapshotId.getName()) == false) {
288342
addFailure(
289343
"snapshot [%s] for shard [%s/%d] has mismatched name [%s]",
@@ -298,17 +352,20 @@ private void verifyShardSnapshot(
298352
verifyFileInfo(snapshotId.toString(), shardId, shardContainerContents.blobsByName(), fileInfo);
299353
}
300354

301-
boolean foundSnapshot = false;
302-
for (SnapshotFiles summary : shardContainerContents.blobStoreIndexShardSnapshots().snapshots()) {
303-
if (summary.snapshot().equals(snapshotId.getName())) {
304-
foundSnapshot = true;
305-
verifyConsistentShardFiles(snapshotId, shardId, shardSnapshot, summary);
306-
break;
355+
final var blobStoreIndexShardSnapshots = shardContainerContents.blobStoreIndexShardSnapshots();
356+
if (blobStoreIndexShardSnapshots != null) {
357+
boolean foundSnapshot = false;
358+
for (SnapshotFiles summary : blobStoreIndexShardSnapshots.snapshots()) {
359+
if (summary.snapshot().equals(snapshotId.getName())) {
360+
foundSnapshot = true;
361+
verifyConsistentShardFiles(snapshotId, shardId, shardSnapshot, summary);
362+
break;
363+
}
307364
}
308-
}
309365

310-
if (foundSnapshot == false) {
311-
addFailure("snapshot [%s] for shard [%s/%d] has no entry in the shard-level summary", snapshotId, indexId, shardId);
366+
if (foundSnapshot == false) {
367+
addFailure("snapshot [%s] for shard [%s/%d] has no entry in the shard-level summary", snapshotId, indexId, shardId);
368+
}
312369
}
313370
}
314371

0 commit comments

Comments
 (0)