Fixing the conditions for fetching remote master history (#89472) (#90040)

masseyke · web-flow · commit 82ac8502a908 · 2022-09-14T04:58:22.000+09:30
Fixing the conditions that the health API uses to determine when to check with a master node for its view
of master history if the master appears to have gone null repeatedly.
diff --git a/docs/changelog/89472.yaml b/docs/changelog/89472.yaml
@@ -0,0 +1,6 @@
+pr: 89472
+summary: Fixing the conditions for fetching remote master history
+area: Health
+type: bug
+issues:
+ - 89431
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java
@@ -426,28 +426,24 @@ public void testRepeatedNullMasterRecognizedAsGreenIfMasterDoesNotKnowItIsUnstab
                 .put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
                 .build()
         );
+        int nullTransitionsThreshold = 1;
         final List<String> dataNodes = internalCluster().startDataOnlyNodes(
             2,
             Settings.builder()
                 .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s")
                 .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s")
-                .put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
+                .put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), nullTransitionsThreshold)
                 .put(CoordinationDiagnosticsService.NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING.getKey(), new TimeValue(60, TimeUnit.SECONDS))
                 .build()
         );
         ensureStableCluster(3);
-        for (int i = 0; i < 2; i++) {
+        for (int i = 0; i < nullTransitionsThreshold + 1; i++) {
             final String masterNode = masterNodes.get(0);
 
             // Simulating a painful gc by suspending all threads for a long time on the current elected master node.
             SingleNodeDisruption masterNodeDisruption = new LongGCDisruption(random(), masterNode);
 
             final CountDownLatch dataNodeMasterSteppedDown = new CountDownLatch(2);
-            internalCluster().getInstance(ClusterService.class, masterNode).addListener(event -> {
-                if (event.state().nodes().getMasterNodeId() == null) {
-                    dataNodeMasterSteppedDown.countDown();
-                }
-            });
             internalCluster().getInstance(ClusterService.class, dataNodes.get(0)).addListener(event -> {
                 if (event.state().nodes().getMasterNodeId() == null) {
                     dataNodeMasterSteppedDown.countDown();
@@ -466,7 +462,7 @@ public void testRepeatedNullMasterRecognizedAsGreenIfMasterDoesNotKnowItIsUnstab
             // Stop disruption
             logger.info("--> unfreezing node [{}]", masterNode);
             masterNodeDisruption.stopDisrupting();
-            ensureStableCluster(3);
+            ensureStableCluster(3, TimeValue.timeValueSeconds(30), false, randomFrom(dataNodes));
         }
         assertGreenMasterStability(internalCluster().client(randomFrom(dataNodes)));
     }
diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationDiagnosticsService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationDiagnosticsService.java
@@ -442,8 +442,16 @@ private boolean hasSeenMasterInHasMasterLookupTimeframe() {
     public void clusterChanged(ClusterChangedEvent event) {
         DiscoveryNode currentMaster = event.state().nodes().getMasterNode();
         DiscoveryNode previousMaster = event.previousState().nodes().getMasterNode();
-        if (currentMaster == null && previousMaster != null) {
+        if ((currentMaster == null && previousMaster != null) || (currentMaster != null && previousMaster == null)) {
             if (masterHistoryService.getLocalMasterHistory().hasMasterGoneNullAtLeastNTimes(unacceptableNullTransitions)) {
+                /*
+                 * If the master node has been going to null repeatedly, we want to make a remote request to it to see what it thinks of
+                 * master stability. We want to query the most recent master whether the current master has just transitioned to null or
+                 * just transitioned from null to not null. The reason that we make the latter request is that sometimes when the elected
+                 * master goes to null the most recent master is not responsive for the duration of the request timeout (for example if
+                 * that node is in the middle of a long GC pause which would be both the reason for it not being master and the reason it
+                 * does not respond quickly to transport requests).
+                 */
                 DiscoveryNode master = masterHistoryService.getLocalMasterHistory().getMostRecentNonNullMaster();
                 /*
                  * If the most recent master was this box, there is no point in making a transport request -- we already know what this