Skip to content

Commit 808ce72

Browse files
authored
Don't announce ready until file settings are applied (elastic#92856)
Instead of failing startup on incorrect file based settings, prevent the node from declaring readiness. This is equivalent from an outside perspective of the readiness probe, however, it doesn't block on the cluster state update task.
1 parent 2b04ca6 commit 808ce72

File tree

21 files changed

+495
-436
lines changed

21 files changed

+495
-436
lines changed

docs/changelog/92856.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92856
2+
summary: Don't announce ready until file settings are applied
3+
area: Infra/Core
4+
type: bug
5+
issues:
6+
- 92812

server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestFileSettingsIT.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
2121
import org.elasticsearch.cluster.service.ClusterService;
2222
import org.elasticsearch.common.bytes.BytesReference;
23-
import org.elasticsearch.common.settings.Settings;
2423
import org.elasticsearch.core.Strings;
2524
import org.elasticsearch.core.Tuple;
2625
import org.elasticsearch.plugins.Plugin;
@@ -51,11 +50,6 @@
5150

5251
public class IngestFileSettingsIT extends ESIntegTestCase {
5352

54-
@Override
55-
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
56-
return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
57-
}
58-
5953
@Override
6054
protected Collection<Class<? extends Plugin>> nodePlugins() {
6155
return Arrays.asList(CustomIngestTestPlugin.class);
@@ -107,7 +101,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
107101
"ingest_pipelines": {
108102
"my_ingest_pipeline": {
109103
"description": "_description",
110-
"processors": [
104+
"processors":
111105
{
112106
"foo" : {
113107
"field": "pipeline",
@@ -210,11 +204,11 @@ public void clusterChanged(ClusterChangedEvent event) {
210204
clusterService.removeListener(this);
211205
metadataVersion.set(event.state().metadata().version());
212206
savedClusterState.countDown();
213-
assertEquals(ReservedStateErrorMetadata.ErrorKind.VALIDATION, reservedState.errorMetadata().errorKind());
207+
assertEquals(ReservedStateErrorMetadata.ErrorKind.PARSING, reservedState.errorMetadata().errorKind());
214208
assertThat(reservedState.errorMetadata().errors(), allOf(notNullValue(), hasSize(1)));
215209
assertThat(
216210
reservedState.errorMetadata().errors().get(0),
217-
containsString("org.elasticsearch.ElasticsearchParseException: No processor type exists with name [foo]")
211+
containsString("org.elasticsearch.xcontent.XContentParseException: [17:16] [reserved_state_chunk] failed")
218212
);
219213
}
220214
}

server/src/internalClusterTest/java/org/elasticsearch/readiness/ReadinessClusterIT.java

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,73 @@
88
package org.elasticsearch.readiness;
99

1010
import org.elasticsearch.client.internal.Client;
11+
import org.elasticsearch.cluster.ClusterChangedEvent;
12+
import org.elasticsearch.cluster.ClusterStateListener;
13+
import org.elasticsearch.cluster.metadata.ReservedStateErrorMetadata;
14+
import org.elasticsearch.cluster.metadata.ReservedStateHandlerMetadata;
15+
import org.elasticsearch.cluster.metadata.ReservedStateMetadata;
16+
import org.elasticsearch.cluster.service.ClusterService;
1117
import org.elasticsearch.common.settings.Settings;
18+
import org.elasticsearch.core.Strings;
19+
import org.elasticsearch.core.Tuple;
1220
import org.elasticsearch.discovery.MasterNotDiscoveredException;
21+
import org.elasticsearch.reservedstate.action.ReservedClusterSettingsAction;
22+
import org.elasticsearch.reservedstate.service.FileSettingsService;
1323
import org.elasticsearch.test.ESIntegTestCase;
1424
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
1525
import org.elasticsearch.test.InternalTestCluster;
1626
import org.elasticsearch.test.readiness.ReadinessClientProbe;
1727

28+
import java.nio.charset.StandardCharsets;
29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
31+
import java.nio.file.StandardCopyOption;
1832
import java.util.List;
33+
import java.util.concurrent.CountDownLatch;
1934
import java.util.concurrent.TimeUnit;
35+
import java.util.concurrent.atomic.AtomicLong;
2036

37+
import static org.elasticsearch.node.Node.INITIAL_STATE_TIMEOUT_SETTING;
2138
import static org.elasticsearch.test.NodeRoles.dataOnlyNode;
2239
import static org.elasticsearch.test.NodeRoles.masterNode;
2340
import static org.elasticsearch.test.NodeRoles.nonDataNode;
41+
import static org.hamcrest.Matchers.allOf;
42+
import static org.hamcrest.Matchers.containsString;
2443
import static org.hamcrest.Matchers.equalTo;
44+
import static org.hamcrest.Matchers.hasSize;
45+
import static org.hamcrest.Matchers.notNullValue;
2546

2647
@ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
2748
public class ReadinessClusterIT extends ESIntegTestCase implements ReadinessClientProbe {
2849

50+
private static AtomicLong versionCounter = new AtomicLong(1);
51+
52+
private static String testErrorJSON = """
53+
{
54+
"metadata": {
55+
"version": "%s",
56+
"compatibility": "8.4.0"
57+
},
58+
"state": {
59+
"not_cluster_settings": {
60+
"search.allow_expensive_queries": "false"
61+
}
62+
}
63+
}""";
64+
65+
private static String testJSON = """
66+
{
67+
"metadata": {
68+
"version": "%s",
69+
"compatibility": "8.4.0"
70+
},
71+
"state": {
72+
"cluster_settings": {
73+
"indices.recovery.max_bytes_per_sec": "50mb"
74+
}
75+
}
76+
}""";
77+
2978
@Override
3079
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
3180
Settings.Builder settings = Settings.builder()
@@ -152,4 +201,122 @@ public Settings onNodeStopped(String nodeName) throws Exception {
152201
tcpReadinessProbeTrue(s);
153202
}
154203
}
204+
205+
private Tuple<CountDownLatch, AtomicLong> setupClusterStateListenerForError(String node) {
206+
ClusterService clusterService = internalCluster().clusterService(node);
207+
CountDownLatch savedClusterState = new CountDownLatch(1);
208+
AtomicLong metadataVersion = new AtomicLong(-1);
209+
clusterService.addListener(new ClusterStateListener() {
210+
@Override
211+
public void clusterChanged(ClusterChangedEvent event) {
212+
ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
213+
if (reservedState != null && reservedState.errorMetadata() != null) {
214+
assertEquals(ReservedStateErrorMetadata.ErrorKind.PARSING, reservedState.errorMetadata().errorKind());
215+
assertThat(reservedState.errorMetadata().errors(), allOf(notNullValue(), hasSize(1)));
216+
assertThat(
217+
reservedState.errorMetadata().errors().get(0),
218+
containsString("Missing handler definition for content key [not_cluster_settings]")
219+
);
220+
clusterService.removeListener(this);
221+
metadataVersion.set(event.state().metadata().version());
222+
savedClusterState.countDown();
223+
}
224+
}
225+
});
226+
227+
return new Tuple<>(savedClusterState, metadataVersion);
228+
}
229+
230+
private void writeJSONFile(String node, String json) throws Exception {
231+
long version = versionCounter.incrementAndGet();
232+
233+
FileSettingsService fileSettingsService = internalCluster().getInstance(FileSettingsService.class, node);
234+
235+
Files.createDirectories(fileSettingsService.operatorSettingsDir());
236+
Path tempFilePath = createTempFile();
237+
238+
Files.write(tempFilePath, Strings.format(json, version).getBytes(StandardCharsets.UTF_8));
239+
Files.move(tempFilePath, fileSettingsService.operatorSettingsFile(), StandardCopyOption.ATOMIC_MOVE);
240+
logger.info("--> New file settings: [{}]", Strings.format(json, version));
241+
}
242+
243+
public void testNotReadyOnBadFileSettings() throws Exception {
244+
internalCluster().setBootstrapMasterNodeIndex(0);
245+
logger.info("--> start data node / non master node");
246+
String dataNode = internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
247+
FileSettingsService dataFileSettingsService = internalCluster().getInstance(FileSettingsService.class, dataNode);
248+
249+
assertFalse(dataFileSettingsService.watching());
250+
251+
logger.info("--> write bad file settings before we boot master node");
252+
writeJSONFile(dataNode, testErrorJSON);
253+
254+
logger.info("--> start master node");
255+
final String masterNode = internalCluster().startMasterOnlyNode(
256+
Settings.builder().put(INITIAL_STATE_TIMEOUT_SETTING.getKey(), "0s").build()
257+
);
258+
assertMasterNode(internalCluster().nonMasterClient(), masterNode);
259+
var savedClusterState = setupClusterStateListenerForError(masterNode);
260+
261+
FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode);
262+
263+
assertTrue(masterFileSettingsService.watching());
264+
assertFalse(dataFileSettingsService.watching());
265+
266+
boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
267+
assertTrue(awaitSuccessful);
268+
269+
ReadinessService s = internalCluster().getInstance(ReadinessService.class, internalCluster().getMasterName());
270+
assertNull(s.boundAddress());
271+
}
272+
273+
private Tuple<CountDownLatch, AtomicLong> setupClusterStateListener(String node) {
274+
ClusterService clusterService = internalCluster().clusterService(node);
275+
CountDownLatch savedClusterState = new CountDownLatch(1);
276+
AtomicLong metadataVersion = new AtomicLong(-1);
277+
clusterService.addListener(new ClusterStateListener() {
278+
@Override
279+
public void clusterChanged(ClusterChangedEvent event) {
280+
ReservedStateMetadata reservedState = event.state().metadata().reservedStateMetadata().get(FileSettingsService.NAMESPACE);
281+
if (reservedState != null) {
282+
ReservedStateHandlerMetadata handlerMetadata = reservedState.handlers().get(ReservedClusterSettingsAction.NAME);
283+
if (handlerMetadata != null && handlerMetadata.keys().contains("indices.recovery.max_bytes_per_sec")) {
284+
clusterService.removeListener(this);
285+
metadataVersion.set(event.state().metadata().version());
286+
savedClusterState.countDown();
287+
}
288+
}
289+
}
290+
});
291+
292+
return new Tuple<>(savedClusterState, metadataVersion);
293+
}
294+
295+
public void testReadyAfterCorrectFileSettings() throws Exception {
296+
internalCluster().setBootstrapMasterNodeIndex(0);
297+
logger.info("--> start data node / non master node");
298+
String dataNode = internalCluster().startNode(Settings.builder().put(dataOnlyNode()).put("discovery.initial_state_timeout", "1s"));
299+
FileSettingsService dataFileSettingsService = internalCluster().getInstance(FileSettingsService.class, dataNode);
300+
301+
assertFalse(dataFileSettingsService.watching());
302+
var savedClusterState = setupClusterStateListener(dataNode);
303+
304+
logger.info("--> write correct file settings before we boot master node");
305+
writeJSONFile(dataNode, testJSON);
306+
307+
logger.info("--> start master node");
308+
final String masterNode = internalCluster().startMasterOnlyNode();
309+
assertMasterNode(internalCluster().nonMasterClient(), masterNode);
310+
311+
FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode);
312+
313+
assertTrue(masterFileSettingsService.watching());
314+
assertFalse(dataFileSettingsService.watching());
315+
316+
boolean awaitSuccessful = savedClusterState.v1().await(20, TimeUnit.SECONDS);
317+
assertTrue(awaitSuccessful);
318+
319+
ReadinessService s = internalCluster().getInstance(ReadinessService.class, internalCluster().getMasterName());
320+
tcpReadinessProbeTrue(s);
321+
}
155322
}

server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,6 @@
5252
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
5353
public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase {
5454

55-
@Override
56-
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
57-
return applyWorkaroundForIssue92812(super.nodeSettings(nodeOrdinal, otherSettings));
58-
}
59-
6055
private static AtomicLong versionCounter = new AtomicLong(1);
6156

6257
private static String emptyJSON = """

0 commit comments

Comments
 (0)