Skip to content

Commit 3d712e3

Browse files
authored
Handle snapshot restore in file settings (#89321) (#89398)
1 parent b172db5 commit 3d712e3

File tree

14 files changed

+505
-76
lines changed

14 files changed

+505
-76
lines changed

docs/changelog/89321.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 89321
2+
summary: Handle snapshot restore in file settings
3+
area: Infra/Core
4+
type: bug
5+
issues: [89183]

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
4646
public class FileSettingsServiceIT extends ESIntegTestCase {
4747

48-
private AtomicLong versionCounter = new AtomicLong(1);
48+
private static AtomicLong versionCounter = new AtomicLong(1);
4949

5050
private static String testJSON = """
5151
{
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.reservedstate.service;
10+
11+
import org.elasticsearch.action.admin.cluster.settings.ClusterGetSettingsAction;
12+
import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
13+
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
14+
import org.elasticsearch.cluster.ClusterChangedEvent;
15+
import org.elasticsearch.cluster.ClusterStateListener;
16+
import org.elasticsearch.cluster.InternalClusterInfoService;
17+
import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
18+
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
19+
import org.elasticsearch.cluster.service.ClusterService;
20+
import org.elasticsearch.common.settings.Settings;
21+
import org.elasticsearch.core.Strings;
22+
import org.elasticsearch.core.TimeValue;
23+
import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
24+
import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase;
25+
import org.elasticsearch.snapshots.SnapshotState;
26+
import org.junit.After;
27+
28+
import java.nio.charset.StandardCharsets;
29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
31+
import java.nio.file.StandardCopyOption;
32+
import java.util.concurrent.CountDownLatch;
33+
import java.util.concurrent.TimeUnit;
34+
import java.util.concurrent.atomic.AtomicLong;
35+
36+
import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING;
37+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
38+
import static org.hamcrest.Matchers.equalTo;
39+
40+
/**
41+
* Tests that snapshot restore behaves correctly when we have file based settings that reserve part of the
42+
* cluster state
43+
*/
44+
public class SnaphotsAndFileSettingsIT extends AbstractSnapshotIntegTestCase {
45+
private static AtomicLong versionCounter = new AtomicLong(1);
46+
47+
private static String testFileSettingsJSON = """
48+
{
49+
"metadata": {
50+
"version": "%s",
51+
"compatibility": "8.4.0"
52+
},
53+
"state": {
54+
"cluster_settings": {
55+
"indices.recovery.max_bytes_per_sec": "50mb"
56+
}
57+
}
58+
}""";
59+
60+
private static String emptyFileSettingsJSON = """
61+
{
62+
"metadata": {
63+
"version": "%s",
64+
"compatibility": "8.4.0"
65+
},
66+
"state": {
67+
"cluster_settings": {}
68+
}
69+
}""";
70+
71+
@After
72+
public void cleanUp() throws Exception {
73+
awaitNoMoreRunningOperations();
74+
}
75+
76+
private void writeJSONFile(String node, String json) throws Exception {
77+
long version = versionCounter.incrementAndGet();
78+
79+
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
80+
81+
Files.createDirectories(fileSettingsService.operatorSettingsDir());
82+
Path tempFilePath = createTempFile();
83+
84+
Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
85+
Files.move(tempFilePath, fileSettingsService.operatorSettingsFile(), StandardCopyOption.ATOMIC_MOVE);
86+
}
87+
88+
private CountDownLatch setupClusterStateListener(String node) {
89+
ClusterService clusterService = internalCluster().clusterService(node);
90+
CountDownLatch savedClusterState = new CountDownLatch(1);
91+
clusterService.addListener(new ClusterStateListener() {
92+
@Override
93+
public void clusterChanged(ClusterChangedEvent event) {
94+
ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
95+
if (reservedState != null) {
96+
ReservedStateHandlerMetadata handlerMetadata = reservedState.handlers().get(ReservedClusterSettingsAction.NAME);
97+
if (handlerMetadata == null) {
98+
fail("Should've found cluster settings in this metadata");
99+
}
100+
if (handlerMetadata.keys().contains("indices.recovery.max_bytes_per_sec")) {
101+
clusterService.removeListener(this);
102+
savedClusterState.countDown();
103+
}
104+
}
105+
}
106+
});
107+
108+
return savedClusterState;
109+
}
110+
111+
private ClusterStateResponse assertClusterStateSaveOK(CountDownLatch savedClusterState) throws Exception {
112+
boolean awaitSuccessful = savedClusterState.await(20, TimeUnit.SECONDS);
113+
assertTrue(awaitSuccessful);
114+
115+
return clusterAdmin().state(new ClusterStateRequest()).actionGet();
116+
}
117+
118+
public void testRestoreWithRemovedFileSettings() throws Exception {
119+
try {
120+
createRepository("test-repo", "fs");
121+
122+
logger.info("--> set some persistent cluster settings");
123+
assertAcked(
124+
clusterAdmin().prepareUpdateSettings()
125+
.setPersistentSettings(
126+
Settings.builder()
127+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(25))
128+
.build()
129+
)
130+
);
131+
132+
ensureGreen();
133+
134+
String masterNode = internalCluster().getMasterName();
135+
136+
var savedClusterState = setupClusterStateListener(masterNode);
137+
FileSettingsService fs = internalCluster().getInstance(FileSettingsService.class, masterNode);
138+
139+
logger.info("--> write some file based settings, putting some reserved state");
140+
writeJSONFile(masterNode, testFileSettingsJSON);
141+
final ClusterStateResponse savedStateResponse = assertClusterStateSaveOK(savedClusterState);
142+
assertThat(
143+
savedStateResponse.getState().metadata().persistentSettings().get(INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING.getKey()),
144+
equalTo("50mb")
145+
);
146+
147+
logger.info("--> create full snapshot");
148+
createFullSnapshot("test-repo", "test-snap");
149+
assertThat(getSnapshot("test-repo", "test-snap").state(), equalTo(SnapshotState.SUCCESS));
150+
151+
assertAcked(
152+
clusterAdmin().prepareUpdateSettings()
153+
.setPersistentSettings(
154+
Settings.builder()
155+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(55))
156+
.build()
157+
)
158+
);
159+
160+
logger.info("--> deleting operator file, no file based settings");
161+
Files.delete(fs.operatorSettingsFile());
162+
163+
logger.info("--> restore global state from the snapshot");
164+
clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap").setRestoreGlobalState(true).setWaitForCompletion(true).get();
165+
166+
ensureGreen();
167+
168+
final ClusterStateResponse clusterStateResponse = clusterAdmin().state(new ClusterStateRequest().metadata(true)).actionGet();
169+
170+
// We expect no reserved metadata state for file based settings, the operator file was deleted.
171+
assertNull(clusterStateResponse.getState().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE));
172+
173+
final ClusterGetSettingsAction.Response getSettingsResponse = clusterAdmin().execute(
174+
ClusterGetSettingsAction.INSTANCE,
175+
new ClusterGetSettingsAction.Request()
176+
).actionGet();
177+
178+
assertThat(
179+
getSettingsResponse.persistentSettings().get(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey()),
180+
equalTo("25s")
181+
);
182+
// We didn't remove the setting set by file settings, we simply removed the reserved (operator) section.
183+
assertThat(getSettingsResponse.persistentSettings().get("indices.recovery.max_bytes_per_sec"), equalTo("50mb"));
184+
} finally {
185+
// cleanup
186+
assertAcked(
187+
clusterAdmin().prepareUpdateSettings()
188+
.setPersistentSettings(
189+
Settings.builder()
190+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), (String) null)
191+
.put("indices.recovery.max_bytes_per_sec", (String) null)
192+
.build()
193+
)
194+
);
195+
}
196+
}
197+
198+
private CountDownLatch removedReservedClusterStateListener(String node) {
199+
ClusterService clusterService = internalCluster().clusterService(node);
200+
CountDownLatch savedClusterState = new CountDownLatch(1);
201+
clusterService.addListener(new ClusterStateListener() {
202+
@Override
203+
public void clusterChanged(ClusterChangedEvent event) {
204+
ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
205+
if (reservedState != null && reservedState.version() == 0L) {
206+
clusterService.removeListener(this);
207+
savedClusterState.countDown();
208+
}
209+
}
210+
});
211+
212+
return savedClusterState;
213+
}
214+
215+
private CountDownLatch cleanedClusterStateListener(String node) {
216+
ClusterService clusterService = internalCluster().clusterService(node);
217+
CountDownLatch savedClusterState = new CountDownLatch(1);
218+
clusterService.addListener(new ClusterStateListener() {
219+
@Override
220+
public void clusterChanged(ClusterChangedEvent event) {
221+
ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
222+
if (reservedState != null) {
223+
ReservedStateHandlerMetadata handlerMetadata = reservedState.handlers().get(ReservedClusterSettingsAction.NAME);
224+
if (handlerMetadata == null) {
225+
fail("Should've found cluster settings in this metadata");
226+
}
227+
if (handlerMetadata.keys().isEmpty()) {
228+
clusterService.removeListener(this);
229+
savedClusterState.countDown();
230+
}
231+
}
232+
}
233+
});
234+
235+
return savedClusterState;
236+
}
237+
238+
public void testRestoreWithPersistedFileSettings() throws Exception {
239+
try {
240+
createRepository("test-repo", "fs");
241+
242+
logger.info("--> set some persistent cluster settings");
243+
assertAcked(
244+
clusterAdmin().prepareUpdateSettings()
245+
.setPersistentSettings(
246+
Settings.builder()
247+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(25))
248+
.build()
249+
)
250+
);
251+
252+
ensureGreen();
253+
254+
String masterNode = internalCluster().getMasterName();
255+
256+
var savedClusterState = setupClusterStateListener(masterNode);
257+
FileSettingsService fs = internalCluster().getInstance(FileSettingsService.class, masterNode);
258+
259+
logger.info("--> write some file based settings, putting some reserved state");
260+
writeJSONFile(masterNode, testFileSettingsJSON);
261+
final ClusterStateResponse savedStateResponse = assertClusterStateSaveOK(savedClusterState);
262+
assertThat(
263+
savedStateResponse.getState().metadata().persistentSettings().get(INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING.getKey()),
264+
equalTo("50mb")
265+
);
266+
267+
logger.info("--> create full snapshot");
268+
createFullSnapshot("test-repo", "test-snap");
269+
assertThat(getSnapshot("test-repo", "test-snap").state(), equalTo(SnapshotState.SUCCESS));
270+
271+
assertAcked(
272+
clusterAdmin().prepareUpdateSettings()
273+
.setPersistentSettings(
274+
Settings.builder()
275+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), TimeValue.timeValueSeconds(55))
276+
.build()
277+
)
278+
);
279+
280+
logger.info("--> restore global state from the snapshot");
281+
var removedReservedState = removedReservedClusterStateListener(masterNode);
282+
var restoredReservedState = setupClusterStateListener(masterNode);
283+
284+
clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap").setRestoreGlobalState(true).setWaitForCompletion(true).get();
285+
286+
ensureGreen();
287+
288+
// When the target cluster of a restore has an existing operator file, we don't un-reserve the reserved
289+
// cluster state for file based settings, but instead we reset the version to 0 and 'touch' the operator file
290+
// so that it gets re-processed.
291+
logger.info("--> reserved state version will be reset to 0, because of snapshot restore");
292+
assertTrue(removedReservedState.await(20, TimeUnit.SECONDS));
293+
294+
logger.info("--> reserved state would be restored");
295+
assertTrue(restoredReservedState.await(20, TimeUnit.SECONDS));
296+
297+
final ClusterStateResponse clusterStateResponse = clusterAdmin().state(new ClusterStateRequest().metadata(true)).actionGet();
298+
299+
assertNotNull(clusterStateResponse.getState().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE));
300+
301+
final ClusterGetSettingsAction.Response getSettingsResponse = clusterAdmin().execute(
302+
ClusterGetSettingsAction.INSTANCE,
303+
new ClusterGetSettingsAction.Request()
304+
).actionGet();
305+
306+
assertThat(
307+
getSettingsResponse.persistentSettings().get(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey()),
308+
equalTo("25s")
309+
);
310+
311+
// we need to remove the reserved state, so that clean-up can happen
312+
var cleanupReservedState = cleanedClusterStateListener(masterNode);
313+
314+
logger.info("--> clear the file based settings");
315+
writeJSONFile(masterNode, emptyFileSettingsJSON);
316+
assertClusterStateSaveOK(cleanupReservedState);
317+
} finally {
318+
// cleanup
319+
assertAcked(
320+
clusterAdmin().prepareUpdateSettings()
321+
.setPersistentSettings(
322+
Settings.builder()
323+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), (String) null)
324+
.put("indices.recovery.max_bytes_per_sec", (String) null)
325+
.build()
326+
)
327+
);
328+
}
329+
}
330+
331+
}

server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1778,14 +1778,24 @@ public Builder put(Map<String, ReservedStateMetadata> reservedStateMetadata) {
17781778

17791779
/**
17801780
* Adds a {@link ReservedStateMetadata} for a given namespace to the metadata builder
1781-
* @param metadata an {@link ReservedStateMetadata}
1781+
* @param metadata a {@link ReservedStateMetadata}
17821782
* @return {@link Builder}
17831783
*/
17841784
public Builder put(ReservedStateMetadata metadata) {
17851785
reservedStateMetadata.put(metadata.namespace(), metadata);
17861786
return this;
17871787
}
17881788

1789+
/**
1790+
* Removes a {@link ReservedStateMetadata} for a given namespace
1791+
* @param metadata a {@link ReservedStateMetadata}
1792+
* @return {@link Builder}
1793+
*/
1794+
public Builder removeReservedState(ReservedStateMetadata metadata) {
1795+
reservedStateMetadata.remove(metadata.namespace());
1796+
return this;
1797+
}
1798+
17891799
public Builder indexGraveyard(final IndexGraveyard indexGraveyard) {
17901800
putCustom(IndexGraveyard.TYPE, indexGraveyard);
17911801
return this;

server/src/main/java/org/elasticsearch/cluster/metadata/ReservedStateMetadata.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,18 @@ public Builder(String namespace) {
214214
this.errorMetadata = null;
215215
}
216216

217+
/**
218+
* Creates an reserved state metadata builder
219+
*
220+
* @param metadata the previous metadata
221+
*/
222+
public Builder(ReservedStateMetadata metadata) {
223+
this(metadata.namespace);
224+
this.version = metadata.version;
225+
this.handlers = new HashMap<>(metadata.handlers);
226+
this.errorMetadata = metadata.errorMetadata;
227+
}
228+
217229
/**
218230
* Creates an reserved state metadata builder
219231
*

0 commit comments

Comments
 (0)