diff --git a/docs/changelog/128277.yaml b/docs/changelog/128277.yaml new file mode 100644 index 0000000000000..6f66c4d613029 --- /dev/null +++ b/docs/changelog/128277.yaml @@ -0,0 +1,5 @@ +pr: 128277 +summary: Improve get-snapshots behaviour for unreadable repositories +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java index c0e63520fee9e..2068997aefd7b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/GetSnapshotsIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.snapshots; +import org.apache.logging.log4j.Level; import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; @@ -40,10 +41,13 @@ import org.elasticsearch.core.Predicates; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.MockLog; import org.elasticsearch.test.XContentTestUtils; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import org.elasticsearch.threadpool.ThreadPool; @@ -633,6 +637,68 @@ public void testRetrievingSnapshotsWhenRepositoryIsMissing() throws Exception { expectThrows(RepositoryMissingException.class, multiRepoFuture::actionGet); } + public void testRetrievingSnapshotsWhenRepositoryIsUnreadable() throws Exception { + final String repoName = randomIdentifier(); + final Path repoPath = randomRepoPath(); + createRepository( + repoName, + "fs", + Settings.builder().put("location", repoPath).put(BlobStoreRepository.CACHE_REPOSITORY_DATA.getKey(), false) + ); + createNSnapshots(repoName, randomIntBetween(1, 3)); + + try { + try (var directoryStream = Files.newDirectoryStream(repoPath)) { + for (final var directoryEntry : directoryStream) { + if (Files.isRegularFile(directoryEntry) && directoryEntry.getFileName().toString().startsWith("index-")) { + Files.writeString(directoryEntry, "invalid"); + } + } + } + + final var repositoryException = safeAwaitAndUnwrapFailure( + RepositoryException.class, + GetSnapshotsResponse.class, + l -> clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName) + .setSort(SnapshotSortKey.NAME) + .setIgnoreUnavailable(false) + .execute(l) + ); + assertEquals( + Strings.format("[%s] cannot retrieve snapshots list from this repository", repoName), + repositoryException.getMessage() + ); + assertEquals( + Strings.format("[%s] Unexpected exception when loading repository data", repoName), + repositoryException.getCause().getMessage() + ); + + MockLog.assertThatLogger( + () -> safeAwait( + l -> clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName) + .setSort(SnapshotSortKey.NAME) + .setIgnoreUnavailable(true) + .execute(l.map(response -> { + assertThat(response.getSnapshots(), empty()); + return null; + })) + ), + TransportGetSnapshotsAction.class, + new MockLog.SeenEventExpectation( + "invalid repository warning", + TransportGetSnapshotsAction.class.getCanonicalName(), + Level.WARN, + Strings.format("failed to fetch repository data for [%s]", repoName) + ) + ); + + } finally { + safeAwait( + l -> clusterAdmin().prepareDeleteRepository(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT, repoName).execute(l.map(v -> null)) + ); + } + } + // Create a snapshot that is guaranteed to have a unique start time and duration for tests around ordering by either. // Don't use this with more than 3 snapshots on platforms with low-resolution clocks as the durations could always collide there // causing an infinite loop diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java index 3e0411930191a..b50cba1310c42 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/get/TransportGetSnapshotsAction.java @@ -38,6 +38,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.repositories.ResolvedRepositories; import org.elasticsearch.search.sort.SortOrder; @@ -285,7 +286,16 @@ private void populateResults(ActionListener listener) { ), repositoryName -> asyncRepositoryContentsListener -> SubscribableListener - .newForked(l -> maybeGetRepositoryData(repositoryName, l)) + .newForked(l -> maybeGetRepositoryData(repositoryName, l.delegateResponse((ll, e) -> { + if (ignoreUnavailable) { + logger.warn(Strings.format("failed to fetch repository data for [%s]", repositoryName), e); + ll.onResponse(RepositoryData.EMPTY); + } else { + ll.onFailure( + new RepositoryException(repositoryName, "cannot retrieve snapshots list from this repository", e) + ); + } + }))) .andThenApply(repositoryData -> { assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); cancellableTask.ensureNotCancelled();