2525import org .elasticsearch .core .Tuple ;
2626import org .elasticsearch .reservedstate .action .ReservedClusterSettingsAction ;
2727import org .elasticsearch .test .ESIntegTestCase ;
28+ import org .junit .Before ;
2829
2930import java .nio .charset .StandardCharsets ;
3031import java .nio .file .Files ;
4041import static org .elasticsearch .test .NodeRoles .dataOnlyNode ;
4142import static org .elasticsearch .test .NodeRoles .masterNode ;
4243import static org .hamcrest .Matchers .allOf ;
44+ import static org .hamcrest .Matchers .containsInAnyOrder ;
4345import static org .hamcrest .Matchers .containsString ;
4446import static org .hamcrest .Matchers .equalTo ;
4547import static org .hamcrest .Matchers .hasSize ;
5052@ ESIntegTestCase .ClusterScope (scope = ESIntegTestCase .Scope .TEST , numDataNodes = 0 , autoManageMasterNodes = false )
5153public class FileSettingsServiceIT extends ESIntegTestCase {
5254
53- private static final AtomicLong versionCounter = new AtomicLong (1 );
55+ private final AtomicLong versionCounter = new AtomicLong (1 );
56+
57+ @ Before
58+ public void resetVersionCounter () {
59+ versionCounter .set (1 );
60+ }
5461
5562 private static final String testJSON = """
5663 {
@@ -102,15 +109,29 @@ public class FileSettingsServiceIT extends ESIntegTestCase {
102109 }
103110 }""" ;
104111
112+ private static final String testOtherErrorJSON = """
113+ {
114+ "metadata": {
115+ "version": "%s",
116+ "compatibility": "8.4.0"
117+ },
118+ "state": {
119+ "bad_cluster_settings": {
120+ "search.allow_expensive_queries": "false"
121+ }
122+ }
123+ }""" ;
124+
105125 private void assertMasterNode (Client client , String node ) {
106126 assertThat (
107127 client .admin ().cluster ().prepareState (TEST_REQUEST_TIMEOUT ).get ().getState ().nodes ().getMasterNode ().getName (),
108128 equalTo (node )
109129 );
110130 }
111131
112- public static void writeJSONFile (String node , String json , AtomicLong versionCounter , Logger logger ) throws Exception {
113- long version = versionCounter .incrementAndGet ();
132+ public static void writeJSONFile (String node , String json , AtomicLong versionCounter , Logger logger , boolean incrementVersion )
133+ throws Exception {
134+ long version = incrementVersion ? versionCounter .incrementAndGet () : versionCounter .get ();
114135
115136 FileSettingsService fileSettingsService = internalCluster ().getInstance (FileSettingsService .class , node );
116137
@@ -124,6 +145,15 @@ public static void writeJSONFile(String node, String json, AtomicLong versionCou
124145 logger .info ("--> After writing new settings file: [{}]" , settingsFileContent );
125146 }
126147
148+ public static void writeJSONFile (String node , String json , AtomicLong versionCounter , Logger logger ) throws Exception {
149+ writeJSONFile (node , json , versionCounter , logger , true );
150+ }
151+
152+ public static void writeJSONFileWithoutVersionIncrement (String node , String json , AtomicLong versionCounter , Logger logger )
153+ throws Exception {
154+ writeJSONFile (node , json , versionCounter , logger , false );
155+ }
156+
127157 private Tuple <CountDownLatch , AtomicLong > setupCleanupClusterStateListener (String node ) {
128158 ClusterService clusterService = internalCluster ().clusterService (node );
129159 CountDownLatch savedClusterState = new CountDownLatch (1 );
@@ -171,7 +201,10 @@ public void clusterChanged(ClusterChangedEvent event) {
171201 private void assertClusterStateSaveOK (CountDownLatch savedClusterState , AtomicLong metadataVersion , String expectedBytesPerSec )
172202 throws Exception {
173203 assertTrue (savedClusterState .await (20 , TimeUnit .SECONDS ));
204+ assertExpectedRecoveryBytesSettingAndVersion (metadataVersion , expectedBytesPerSec );
205+ }
174206
207+ private static void assertExpectedRecoveryBytesSettingAndVersion (AtomicLong metadataVersion , String expectedBytesPerSec ) {
175208 final ClusterStateResponse clusterStateResponse = clusterAdmin ().state (
176209 new ClusterStateRequest (TEST_REQUEST_TIMEOUT ).waitForMetadataVersion (metadataVersion .get ())
177210 ).actionGet ();
@@ -337,6 +370,77 @@ public void testErrorSaved() throws Exception {
337370 assertClusterStateNotSaved (savedClusterState .v1 (), savedClusterState .v2 ());
338371 }
339372
373+ public void testErrorCanRecoverOnRestart () throws Exception {
374+ internalCluster ().setBootstrapMasterNodeIndex (0 );
375+ logger .info ("--> start data node / non master node" );
376+ String dataNode = internalCluster ().startNode (Settings .builder ().put (dataOnlyNode ()).put ("discovery.initial_state_timeout" , "1s" ));
377+ FileSettingsService dataFileSettingsService = internalCluster ().getInstance (FileSettingsService .class , dataNode );
378+
379+ assertFalse (dataFileSettingsService .watching ());
380+
381+ logger .info ("--> start master node" );
382+ final String masterNode = internalCluster ().startMasterOnlyNode (
383+ Settings .builder ().put (INITIAL_STATE_TIMEOUT_SETTING .getKey (), "0s" ).build ()
384+ );
385+ assertMasterNode (internalCluster ().nonMasterClient (), masterNode );
386+ var savedClusterState = setupClusterStateListenerForError (masterNode );
387+
388+ FileSettingsService masterFileSettingsService = internalCluster ().getInstance (FileSettingsService .class , masterNode );
389+
390+ assertTrue (masterFileSettingsService .watching ());
391+ assertFalse (dataFileSettingsService .watching ());
392+
393+ writeJSONFile (masterNode , testErrorJSON , versionCounter , logger );
394+ AtomicLong metadataVersion = savedClusterState .v2 ();
395+ assertClusterStateNotSaved (savedClusterState .v1 (), metadataVersion );
396+ assertHasErrors (metadataVersion , "not_cluster_settings" );
397+
398+ // write valid json without version increment to simulate ES being able to process settings after a restart (usually, this would be
399+ // due to a code change)
400+ writeJSONFileWithoutVersionIncrement (masterNode , testJSON , versionCounter , logger );
401+ internalCluster ().restartNode (masterNode );
402+ ensureGreen ();
403+
404+ // we don't know the exact metadata version to wait for so rely on an assertBusy instead
405+ assertBusy (() -> assertExpectedRecoveryBytesSettingAndVersion (metadataVersion , "50mb" ));
406+ assertBusy (() -> assertNoErrors (metadataVersion ));
407+ }
408+
409+ public void testNewErrorOnRestartReprocessing () throws Exception {
410+ internalCluster ().setBootstrapMasterNodeIndex (0 );
411+ logger .info ("--> start data node / non master node" );
412+ String dataNode = internalCluster ().startNode (Settings .builder ().put (dataOnlyNode ()).put ("discovery.initial_state_timeout" , "1s" ));
413+ FileSettingsService dataFileSettingsService = internalCluster ().getInstance (FileSettingsService .class , dataNode );
414+
415+ assertFalse (dataFileSettingsService .watching ());
416+
417+ logger .info ("--> start master node" );
418+ final String masterNode = internalCluster ().startMasterOnlyNode (
419+ Settings .builder ().put (INITIAL_STATE_TIMEOUT_SETTING .getKey (), "0s" ).build ()
420+ );
421+ assertMasterNode (internalCluster ().nonMasterClient (), masterNode );
422+ var savedClusterState = setupClusterStateListenerForError (masterNode );
423+
424+ FileSettingsService masterFileSettingsService = internalCluster ().getInstance (FileSettingsService .class , masterNode );
425+
426+ assertTrue (masterFileSettingsService .watching ());
427+ assertFalse (dataFileSettingsService .watching ());
428+
429+ writeJSONFile (masterNode , testErrorJSON , versionCounter , logger );
430+ AtomicLong metadataVersion = savedClusterState .v2 ();
431+ assertClusterStateNotSaved (savedClusterState .v1 (), metadataVersion );
432+ assertHasErrors (metadataVersion , "not_cluster_settings" );
433+
434+ // write json with new error without version increment to simulate ES failing to process settings after a restart for a new reason
435+ // (usually, this would be due to a code change)
436+ writeJSONFileWithoutVersionIncrement (masterNode , testOtherErrorJSON , versionCounter , logger );
437+ assertHasErrors (metadataVersion , "not_cluster_settings" );
438+ internalCluster ().restartNode (masterNode );
439+ ensureGreen ();
440+
441+ assertBusy (() -> assertHasErrors (metadataVersion , "bad_cluster_settings" ));
442+ }
443+
340444 public void testSettingsAppliedOnMasterReElection () throws Exception {
341445 internalCluster ().setBootstrapMasterNodeIndex (0 );
342446 logger .info ("--> start master node" );
@@ -383,4 +487,21 @@ public void testSettingsAppliedOnMasterReElection() throws Exception {
383487 assertClusterStateSaveOK (savedClusterState .v1 (), savedClusterState .v2 (), "43mb" );
384488 }
385489
490+ private void assertHasErrors (AtomicLong waitForMetadataVersion , String expectedError ) {
491+ var errorMetadata = getErrorMetadata (waitForMetadataVersion );
492+ assertThat (errorMetadata , is (notNullValue ()));
493+ assertThat (errorMetadata .errors (), containsInAnyOrder (containsString (expectedError )));
494+ }
495+
496+ private void assertNoErrors (AtomicLong waitForMetadataVersion ) {
497+ var errorMetadata = getErrorMetadata (waitForMetadataVersion );
498+ assertThat (errorMetadata , is (nullValue ()));
499+ }
500+
501+ private ReservedStateErrorMetadata getErrorMetadata (AtomicLong waitForMetadataVersion ) {
502+ final ClusterStateResponse clusterStateResponse = clusterAdmin ().state (
503+ new ClusterStateRequest (TEST_REQUEST_TIMEOUT ).waitForMetadataVersion (waitForMetadataVersion .get ())
504+ ).actionGet ();
505+ return clusterStateResponse .getState ().getMetadata ().reservedStateMetadata ().get (FileSettingsService .NAMESPACE ).errorMetadata ();
506+ }
386507}
0 commit comments