Add delayed allocation diagnosis case to shards availability indicator (#89056) (#90018)

jbaiera · web-flow · commit 9fce22d3378c · 2022-09-13T04:49:27.000+09:30
This PR adds diagnosis logic to the shards availability health indicator that detects when a shard allocation 
is delayed. This usually happens when a node that a shard is allocated to disappears. It is often better to 
delay the recovery of a shard in case the node that hosts it comes back. Shards that are delayed in this 
manner have special flags set on their unassigned info that denote a delayed allocation.

This change adds a diagnosis to the indicator that identifies these delayed shards and provides guidance 
stating that they will eventually allocate on their own once the delay elapses, but if allocation is required 
immediately, an index setting can be updated to perform the allocation.

This PR also includes some light integration testing to ensure that more unassigned cases are covered by 
the indicator.
diff --git a/docs/changelog/89056.yaml b/docs/changelog/89056.yaml
@@ -0,0 +1,5 @@
+pr: 89056
+summary: Add delayed allocation diagnosis case to shards availability indicator
+area: Health
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityHealthIndicatorService.java
@@ -147,6 +147,16 @@ public HealthIndicatorResult calculate(boolean explain) {
         DIAGNOSE_SHARDS_ACTION_GUIDE
     );
 
+    public static final String FIX_DELAYED_SHARDS_GUIDE = "http://ela.st/fix-delayed-shard-allocation";
+    public static final Diagnosis.Definition DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS = new Diagnosis.Definition(
+        "delayed_shard_allocations",
+        "Elasticsearch is not allocating some shards because they are marked for delayed allocation. Shards that have become "
+            + "unavailable are usually marked for delayed allocation because it is more efficient to wait and see if the shards return "
+            + "on their own than to recover the shard immediately.",
+        "Elasticsearch will reallocate the shards when the delay has elapsed. No action is required by the user.",
+        FIX_DELAYED_SHARDS_GUIDE
+    );
+
     public static final String ENABLE_INDEX_ALLOCATION_GUIDE = "http://ela.st/fix-index-allocation";
     public static final Diagnosis.Definition ACTION_ENABLE_INDEX_ROUTING_ALLOCATION = new Diagnosis.Definition(
         "enable_index_allocations",
@@ -413,10 +423,18 @@ List<Diagnosis.Definition> diagnoseUnassignedShardRouting(ShardRouting shardRout
                     actions.add(ACTION_RESTORE_FROM_SNAPSHOT);
                 }
                 break;
+            case NO_ATTEMPT:
+                if (shardRouting.unassignedInfo().isDelayed()) {
+                    actions.add(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS);
+                } else {
+                    actions.addAll(explainAllocationsAndDiagnoseDeciders(shardRouting, state));
+                }
+                break;
             case DECIDERS_NO:
                 actions.addAll(explainAllocationsAndDiagnoseDeciders(shardRouting, state));
                 break;
-            default:
+            case DELAYED_ALLOCATION:
+                actions.add(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS);
                 break;
         }
         if (actions.isEmpty()) {
diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardsAvailabilityHealthIndicatorServiceTests.java
@@ -73,6 +73,7 @@
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_MIGRATE_TIERS_AWAY_FROM_INCLUDE_DATA_LOOKUP;
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_MIGRATE_TIERS_AWAY_FROM_REQUIRE_DATA_LOOKUP;
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_RESTORE_FROM_SNAPSHOT;
+import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS;
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.NAME;
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorServiceTests.ShardState.AVAILABLE;
 import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorServiceTests.ShardState.INITIALIZING;
@@ -386,7 +387,7 @@ public void testShouldBeYellowWhenRestartingReplicasReachedAllocationDelay() {
                             List.of(ImpactArea.SEARCH)
                         )
                     ),
-                    List.of(new Diagnosis(ACTION_CHECK_ALLOCATION_EXPLAIN_API, List.of("restarting-index")))
+                    List.of(new Diagnosis(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS, List.of("restarting-index")))
                 )
             )
         );
@@ -460,7 +461,7 @@ public void testShouldBeRedWhenRestartingPrimariesReachedAllocationDelayAndNoRep
                             List.of(ImpactArea.INGEST, ImpactArea.SEARCH)
                         )
                     ),
-                    List.of(new Diagnosis(ACTION_CHECK_ALLOCATION_EXPLAIN_API, List.of("restarting-index")))
+                    List.of(new Diagnosis(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS, List.of("restarting-index")))
                 )
             )
         );
diff --git a/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/cluster/routing/allocation/DataTierShardAvailabilityHealthIndicatorIT.java b/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/xpack/cluster/routing/allocation/DataTierShardAvailabilityHealthIndicatorIT.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.cluster.routing.allocation;
+
+import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplanation;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.cluster.node.DiscoveryNodeRole;
+import org.elasticsearch.cluster.routing.RoutingNodesHelper;
+import org.elasticsearch.cluster.routing.ShardRouting;
+import org.elasticsearch.cluster.routing.ShardRoutingState;
+import org.elasticsearch.cluster.routing.UnassignedInfo;
+import org.elasticsearch.cluster.routing.allocation.DataTier;
+import org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.health.Diagnosis;
+import org.elasticsearch.health.GetHealthAction;
+import org.elasticsearch.health.HealthIndicatorResult;
+import org.elasticsearch.health.HealthStatus;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.elasticsearch.test.NodeRoles.onlyRole;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasItem;
+
+/**
+ * Contains all integration tests for the {@link org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService}
+ * that require the data tiers allocation decider logic.
+ */
+@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
+public class DataTierShardAvailabilityHealthIndicatorIT extends ESIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return List.of(LocalStateCompositeXPackPlugin.class);
+    }
+
+    /**
+     * Verify that the health API returns an "increase tier capacity" diagnosis when an index is created but there aren't enough nodes in
+     * a tier to host the desired replicas on unique nodes.
+     */
+    public void testIncreaseTierCapacityDiagnosisWhenCreated() throws Exception {
+        internalCluster().startMasterOnlyNodes(1);
+        internalCluster().startNodes(1, onlyRole(DiscoveryNodeRole.DATA_HOT_NODE_ROLE));
+        ElasticsearchAssertions.assertAcked(
+            prepareCreate("test").setSettings(
+                Settings.builder()
+                    .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+                    .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+                    .put(DataTier.TIER_PREFERENCE, DataTier.DATA_HOT)
+            )
+        );
+        ensureYellow("test");
+        GetHealthAction.Response healthResponse = client().execute(
+            GetHealthAction.INSTANCE,
+            new GetHealthAction.Request(ShardsAvailabilityHealthIndicatorService.NAME, true)
+        ).get();
+        HealthIndicatorResult indicatorResult = healthResponse.findIndicator(ShardsAvailabilityHealthIndicatorService.NAME);
+        assertThat(indicatorResult.status(), equalTo(HealthStatus.YELLOW));
+        assertThat(
+            indicatorResult.diagnosisList(),
+            hasItem(
+                new Diagnosis(
+                    ShardsAvailabilityHealthIndicatorService.ACTION_INCREASE_TIER_CAPACITY_LOOKUP.get(DataTier.DATA_HOT),
+                    List.of("test")
+                )
+            )
+        );
+    }
+
+    /**
+     * Verify that the health API returns an "increase tier capacity" diagnosis when enough nodes in a tier leave such that the tier cannot
+     * host all of an index's replicas on unique nodes.
+     */
+    public void testIncreaseTierCapacityDiagnosisWhenTierShrinksUnexpectedly() throws Exception {
+        internalCluster().startMasterOnlyNodes(1);
+        internalCluster().startNodes(2, onlyRole(DiscoveryNodeRole.DATA_HOT_NODE_ROLE));
+        ElasticsearchAssertions.assertAcked(
+            prepareCreate("test").setSettings(
+                Settings.builder()
+                    .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+                    .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+                    .put(DataTier.TIER_PREFERENCE, DataTier.DATA_HOT)
+                    .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), 0)
+            )
+        );
+        ensureGreen("test");
+        indexRandomData("test");
+        internalCluster().stopNode(findNodeWithReplicaShard("test", 0));
+        ensureYellow("test");
+        GetHealthAction.Response healthResponse = client().execute(
+            GetHealthAction.INSTANCE,
+            new GetHealthAction.Request(ShardsAvailabilityHealthIndicatorService.NAME, true)
+        ).get();
+        ClusterAllocationExplanation explain = client().admin()
+            .cluster()
+            .prepareAllocationExplain()
+            .setIndex("test")
+            .setShard(0)
+            .setPrimary(false)
+            .get()
+            .getExplanation();
+        logger.info(XContentHelper.toXContent(explain, XContentType.JSON, true).utf8ToString());
+        HealthIndicatorResult indicatorResult = healthResponse.findIndicator(ShardsAvailabilityHealthIndicatorService.NAME);
+        assertThat(indicatorResult.status(), equalTo(HealthStatus.YELLOW));
+        assertThat(
+            indicatorResult.diagnosisList(),
+            hasItem(
+                new Diagnosis(
+                    ShardsAvailabilityHealthIndicatorService.ACTION_INCREASE_TIER_CAPACITY_LOOKUP.get(DataTier.DATA_HOT),
+                    List.of("test")
+                )
+            )
+        );
+    }
+
+    /**
+     * Verify that the health API returns a "YELLOW" status when a node disappears and a shard is unassigned because it is delayed.
+     */
+    public void testRemovingNodeReturnsYellowForDelayedIndex() throws Exception {
+        internalCluster().startMasterOnlyNodes(1);
+        internalCluster().startNodes(3, onlyRole(DiscoveryNodeRole.DATA_HOT_NODE_ROLE));
+        ElasticsearchAssertions.assertAcked(
+            prepareCreate("test").setSettings(
+                Settings.builder()
+                    .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+                    .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+                    .put(DataTier.TIER_PREFERENCE, DataTier.DATA_HOT)
+                    .put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueMinutes(30))
+            )
+        );
+        ensureGreen("test");
+        indexRandomData("test");
+        internalCluster().stopNode(findNodeWithPrimaryShard("test", 0));
+        ensureYellow("test");
+        GetHealthAction.Response healthResponse = client().execute(
+            GetHealthAction.INSTANCE,
+            new GetHealthAction.Request(ShardsAvailabilityHealthIndicatorService.NAME, true)
+        ).get();
+        HealthIndicatorResult indicatorResult = healthResponse.findIndicator(ShardsAvailabilityHealthIndicatorService.NAME);
+        assertThat(indicatorResult.status(), equalTo(HealthStatus.YELLOW));
+        assertThat(indicatorResult.diagnosisList().size(), equalTo(1));
+        assertThat(
+            indicatorResult.diagnosisList(),
+            hasItem(new Diagnosis(ShardsAvailabilityHealthIndicatorService.DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS, List.of("test")))
+        );
+    }
+
+    private void indexRandomData(String indexName) throws Exception {
+        int numDocs = scaledRandomIntBetween(100, 1000);
+        IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
+        for (int i = 0; i < builders.length; i++) {
+            builders[i] = client().prepareIndex(indexName).setSource("field", "value");
+        }
+        // we want to test both full divergent copies of the shard in terms of segments, and
+        // a case where they are the same (using sync flush), index Random does all this goodness
+        // already
+        indexRandom(true, builders);
+    }
+
+    private String findNodeWithPrimaryShard(String indexName, int shard) {
+        return findNodeWithShard(indexName, shard, true);
+    }
+
+    private String findNodeWithReplicaShard(String indexName, int shard) {
+        return findNodeWithShard(indexName, shard, false);
+    }
+
+    private String findNodeWithShard(final String indexName, final int shard, final boolean primary) {
+        ClusterState state = client().admin().cluster().prepareState().get().getState();
+        List<ShardRouting> startedShards = RoutingNodesHelper.shardsWithState(state.getRoutingNodes(), ShardRoutingState.STARTED);
+        startedShards = startedShards.stream()
+            .filter(shardRouting -> shardRouting.getIndexName().equals(indexName))
+            .filter(shardRouting -> shard == shardRouting.getId())
+            .filter(shardRouting -> primary == shardRouting.primary())
+            .collect(Collectors.toList());
+        Collections.shuffle(startedShards, random());
+        return state.nodes().get(startedShards.get(0).currentNodeId()).getName();
+    }
+}

Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,7 @@`
`73`	`73`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_MIGRATE_TIERS_AWAY_FROM_INCLUDE_DATA_LOOKUP;`
`74`	`74`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_MIGRATE_TIERS_AWAY_FROM_REQUIRE_DATA_LOOKUP;`
`75`	`75`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.ACTION_RESTORE_FROM_SNAPSHOT;`
	`76`	`+import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS;`
`76`	`77`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorService.NAME;`
`77`	`78`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorServiceTests.ShardState.AVAILABLE;`
`78`	`79`	`import static org.elasticsearch.cluster.routing.allocation.ShardsAvailabilityHealthIndicatorServiceTests.ShardState.INITIALIZING;`
`@@ -386,7 +387,7 @@ public void testShouldBeYellowWhenRestartingReplicasReachedAllocationDelay() {`
`386`	`387`	`List.of(ImpactArea.SEARCH)`
`387`	`388`	`)`
`388`	`389`	`),`
`389`		`- List.of(new Diagnosis(ACTION_CHECK_ALLOCATION_EXPLAIN_API, List.of("restarting-index")))`
	`390`	`+ List.of(new Diagnosis(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS, List.of("restarting-index")))`
`390`	`391`	`)`
`391`	`392`	`)`
`392`	`393`	`);`
`@@ -460,7 +461,7 @@ public void testShouldBeRedWhenRestartingPrimariesReachedAllocationDelayAndNoRep`
`460`	`461`	`List.of(ImpactArea.INGEST, ImpactArea.SEARCH)`
`461`	`462`	`)`
`462`	`463`	`),`
`463`		`- List.of(new Diagnosis(ACTION_CHECK_ALLOCATION_EXPLAIN_API, List.of("restarting-index")))`
	`464`	`+ List.of(new Diagnosis(DIAGNOSIS_WAIT_FOR_OR_FIX_DELAYED_SHARDS, List.of("restarting-index")))`
`464`	`465`	`)`
`465`	`466`	`)`
`466`	`467`	`);`