Skip to content

Commit 904099f

Browse files
committed
Improve get-snapshots behaviour for unreadable repositories
Today if the get-snapshots API cannot access one of the repositories we return an exception with a fairly low-level message about the problem, perhaps `Could not determine repository generation from root blobs`. This message is shown verbatim in the Kibana UI so users need something a little more descriptive. With this commit we wrap the exception in one that indicates the problem in terms that users are more likely to understand. Moreover, if the user specifies the `?ignore_unavailable` option then we ignore individual snapshots that are unavailable, treating them as if they do not exist, but an unavailable repository will still cause the API to return an exception. With this commit we extend the meaning of this option to also ignore whole-repository unavailability, treating unavailable repositories as if they are empty. Relates #128208
1 parent 79d3aa8 commit 904099f

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.snapshots;
1111

12+
import org.apache.logging.log4j.Level;
1213
import org.apache.lucene.util.BytesRef;
1314
import org.elasticsearch.action.ActionFuture;
1415
import org.elasticsearch.action.ActionListener;
@@ -40,10 +41,13 @@
4041
import org.elasticsearch.core.Predicates;
4142
import org.elasticsearch.repositories.RepositoriesService;
4243
import org.elasticsearch.repositories.RepositoryData;
44+
import org.elasticsearch.repositories.RepositoryException;
4345
import org.elasticsearch.repositories.RepositoryMissingException;
46+
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
4447
import org.elasticsearch.repositories.fs.FsRepository;
4548
import org.elasticsearch.search.sort.SortOrder;
4649
import org.elasticsearch.test.ESTestCase;
50+
import org.elasticsearch.test.MockLog;
4751
import org.elasticsearch.test.XContentTestUtils;
4852
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
4953
import org.elasticsearch.threadpool.ThreadPool;
@@ -633,6 +637,68 @@ public void testRetrievingSnapshotsWhenRepositoryIsMissing() throws Exception {
633637
expectThrows(RepositoryMissingException.class, multiRepoFuture::actionGet);
634638
}
635639

640+
public void testRetrievingSnapshotsWhenRepositoryIsUnreadable() throws Exception {
641+
final String repoName = randomIdentifier();
642+
final Path repoPath = randomRepoPath();
643+
createRepository(
644+
repoName,
645+
"fs",
646+
Settings.builder().put("location", repoPath).put(BlobStoreRepository.CACHE_REPOSITORY_DATA.getKey(), false)
647+
);
648+
createNSnapshots(repoName, randomIntBetween(1, 3));
649+
650+
try {
651+
try (var directoryStream = Files.newDirectoryStream(repoPath)) {
652+
for (final var directoryEntry : directoryStream) {
653+
if (Files.isRegularFile(directoryEntry) && directoryEntry.getFileName().toString().startsWith("index-")) {
654+
Files.writeString(directoryEntry, "invalid");
655+
}
656+
}
657+
}
658+
659+
final var repositoryException = safeAwaitAndUnwrapFailure(
660+
RepositoryException.class,
661+
GetSnapshotsResponse.class,
662+
l -> clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName)
663+
.setSort(SnapshotSortKey.NAME)
664+
.setIgnoreUnavailable(false)
665+
.execute(l)
666+
);
667+
assertEquals(
668+
Strings.format("[%s] cannot retrieve snapshots list from this repository", repoName),
669+
repositoryException.getMessage()
670+
);
671+
assertEquals(
672+
Strings.format("[%s] Unexpected exception when loading repository data", repoName),
673+
repositoryException.getCause().getMessage()
674+
);
675+
676+
MockLog.assertThatLogger(
677+
() -> safeAwait(
678+
l -> clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName)
679+
.setSort(SnapshotSortKey.NAME)
680+
.setIgnoreUnavailable(true)
681+
.execute(l.map(response -> {
682+
assertThat(response.getSnapshots(), empty());
683+
return null;
684+
}))
685+
),
686+
TransportGetSnapshotsAction.class,
687+
new MockLog.SeenEventExpectation(
688+
"invalid repository warning",
689+
TransportGetSnapshotsAction.class.getCanonicalName(),
690+
Level.WARN,
691+
Strings.format("failed to fetch repository data for [%s]", repoName)
692+
)
693+
);
694+
695+
} finally {
696+
safeAwait(
697+
l -> clusterAdmin().prepareDeleteRepository(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT, repoName).execute(l.map(v -> null))
698+
);
699+
}
700+
}
701+
636702
// Create a snapshot that is guaranteed to have a unique start time and duration for tests around ordering by either.
637703
// Don't use this with more than 3 snapshots on platforms with low-resolution clocks as the durations could always collide there
638704
// causing an infinite loop

server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.elasticsearch.repositories.RepositoriesService;
3939
import org.elasticsearch.repositories.Repository;
4040
import org.elasticsearch.repositories.RepositoryData;
41+
import org.elasticsearch.repositories.RepositoryException;
4142
import org.elasticsearch.repositories.RepositoryMissingException;
4243
import org.elasticsearch.repositories.ResolvedRepositories;
4344
import org.elasticsearch.search.sort.SortOrder;
@@ -285,7 +286,16 @@ private void populateResults(ActionListener<Void> listener) {
285286
),
286287
repositoryName -> asyncRepositoryContentsListener -> SubscribableListener
287288

288-
.<RepositoryData>newForked(l -> maybeGetRepositoryData(repositoryName, l))
289+
.<RepositoryData>newForked(l -> maybeGetRepositoryData(repositoryName, l.delegateResponse((ll, e) -> {
290+
if (ignoreUnavailable) {
291+
logger.warn(Strings.format("failed to fetch repository data for [%s]", repositoryName), e);
292+
ll.onResponse(RepositoryData.EMPTY);
293+
} else {
294+
ll.onFailure(
295+
new RepositoryException(repositoryName, "cannot retrieve snapshots list from this repository", e)
296+
);
297+
}
298+
})))
289299
.andThenApply(repositoryData -> {
290300
assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT);
291301
cancellableTask.ensureNotCancelled();

0 commit comments

Comments
 (0)