|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the "Elastic License |
| 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side |
| 5 | + * Public License v 1"; you may not use this file except in compliance with, at |
| 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public |
| 7 | + * License v3.0 only", or the "Server Side Public License, v 1". |
| 8 | + */ |
| 9 | + |
| 10 | +package org.elasticsearch.cluster.routing.allocation.decider; |
| 11 | + |
| 12 | +import org.apache.logging.log4j.LogManager; |
| 13 | +import org.apache.logging.log4j.Logger; |
| 14 | +import org.elasticsearch.cluster.metadata.IndexMetadata; |
| 15 | +import org.elasticsearch.cluster.metadata.ProjectId; |
| 16 | +import org.elasticsearch.cluster.node.DiscoveryNode; |
| 17 | +import org.elasticsearch.cluster.node.DiscoveryNodeFilters; |
| 18 | +import org.elasticsearch.cluster.node.DiscoveryNodeRole; |
| 19 | +import org.elasticsearch.cluster.routing.RoutingNode; |
| 20 | +import org.elasticsearch.cluster.routing.ShardRouting; |
| 21 | +import org.elasticsearch.cluster.routing.allocation.IndexBalanceConstraintSettings; |
| 22 | +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; |
| 23 | +import org.elasticsearch.common.settings.ClusterSettings; |
| 24 | +import org.elasticsearch.common.settings.Settings; |
| 25 | +import org.elasticsearch.core.Strings; |
| 26 | +import org.elasticsearch.index.Index; |
| 27 | + |
| 28 | +import java.util.HashSet; |
| 29 | +import java.util.List; |
| 30 | +import java.util.Map; |
| 31 | +import java.util.Set; |
| 32 | + |
| 33 | +import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.AND; |
| 34 | +import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.OpType.OR; |
| 35 | +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.INDEX_ROLE; |
| 36 | +import static org.elasticsearch.cluster.node.DiscoveryNodeRole.SEARCH_ROLE; |
| 37 | +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING; |
| 38 | +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP_SETTING; |
| 39 | +import static org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP_SETTING; |
| 40 | + |
| 41 | +/** |
| 42 | + * For an index of n shards hosted by a cluster of m nodes, a node should not host |
| 43 | + * significantly more than n / m shards. This allocation decider enforces this principle. |
| 44 | + * This allocation decider excludes any nodes flagged for shutdown from consideration |
| 45 | + * when computing optimal shard distributions. |
| 46 | + */ |
| 47 | +public class IndexBalanceAllocationDecider extends AllocationDecider { |
| 48 | + |
| 49 | + private static final Logger logger = LogManager.getLogger(IndexBalanceAllocationDecider.class); |
| 50 | + private static final String EMPTY = ""; |
| 51 | + |
| 52 | + public static final String NAME = "index_balance"; |
| 53 | + |
| 54 | + private final IndexBalanceConstraintSettings indexBalanceConstraintSettings; |
| 55 | + private final boolean isStateless; |
| 56 | + |
| 57 | + private volatile DiscoveryNodeFilters clusterRequireFilters; |
| 58 | + private volatile DiscoveryNodeFilters clusterIncludeFilters; |
| 59 | + private volatile DiscoveryNodeFilters clusterExcludeFilters; |
| 60 | + |
| 61 | + public IndexBalanceAllocationDecider(Settings settings, ClusterSettings clusterSettings) { |
| 62 | + this.indexBalanceConstraintSettings = new IndexBalanceConstraintSettings(clusterSettings); |
| 63 | + setClusterRequireFilters(CLUSTER_ROUTING_REQUIRE_GROUP_SETTING.getAsMap(settings)); |
| 64 | + setClusterExcludeFilters(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING.getAsMap(settings)); |
| 65 | + setClusterIncludeFilters(CLUSTER_ROUTING_INCLUDE_GROUP_SETTING.getAsMap(settings)); |
| 66 | + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_REQUIRE_GROUP_SETTING, this::setClusterRequireFilters, (a, b) -> {}); |
| 67 | + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING, this::setClusterExcludeFilters, (a, b) -> {}); |
| 68 | + clusterSettings.addAffixMapUpdateConsumer(CLUSTER_ROUTING_INCLUDE_GROUP_SETTING, this::setClusterIncludeFilters, (a, b) -> {}); |
| 69 | + isStateless = DiscoveryNode.isStateless(settings); |
| 70 | + } |
| 71 | + |
| 72 | + @Override |
| 73 | + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { |
| 74 | + if (indexBalanceConstraintSettings.isDeciderEnabled() == false || isStateless == false || hasFilters()) { |
| 75 | + return allocation.decision(Decision.YES, NAME, "Decider is disabled."); |
| 76 | + } |
| 77 | + |
| 78 | + Index index = shardRouting.index(); |
| 79 | + if (node.hasIndex(index) == false) { |
| 80 | + return allocation.decision(Decision.YES, NAME, "Node does not currently host this index."); |
| 81 | + } |
| 82 | + |
| 83 | + assert node.node() != null; |
| 84 | + assert node.node().getRoles().contains(INDEX_ROLE) || node.node().getRoles().contains(SEARCH_ROLE); |
| 85 | + |
| 86 | + if (node.node().getRoles().contains(INDEX_ROLE) && shardRouting.primary() == false) { |
| 87 | + return allocation.decision(Decision.YES, NAME, "An index node cannot own search shards. Decider inactive."); |
| 88 | + } |
| 89 | + |
| 90 | + if (node.node().getRoles().contains(SEARCH_ROLE) && shardRouting.primary()) { |
| 91 | + return allocation.decision(Decision.YES, NAME, "A search node cannot own primary shards. Decider inactive."); |
| 92 | + } |
| 93 | + |
| 94 | + final ProjectId projectId = allocation.getClusterState().metadata().projectFor(index).id(); |
| 95 | + final Set<DiscoveryNode> eligibleNodes = new HashSet<>(); |
| 96 | + int totalShards = 0; |
| 97 | + String nomenclature = EMPTY; |
| 98 | + |
| 99 | + if (node.node().getRoles().contains(INDEX_ROLE)) { |
| 100 | + collectEligibleNodes(allocation, eligibleNodes, INDEX_ROLE); |
| 101 | + // Primary shards only. |
| 102 | + totalShards = allocation.getClusterState().routingTable(projectId).index(index).size(); |
| 103 | + nomenclature = "index"; |
| 104 | + } else if (node.node().getRoles().contains(SEARCH_ROLE)) { |
| 105 | + collectEligibleNodes(allocation, eligibleNodes, SEARCH_ROLE); |
| 106 | + // Replicas only. |
| 107 | + final IndexMetadata indexMetadata = allocation.getClusterState().metadata().getProject(projectId).index(index); |
| 108 | + totalShards = indexMetadata.getNumberOfShards() * indexMetadata.getNumberOfReplicas(); |
| 109 | + nomenclature = "search"; |
| 110 | + } |
| 111 | + |
| 112 | + assert eligibleNodes.isEmpty() == false; |
| 113 | + if (eligibleNodes.isEmpty()) { |
| 114 | + return allocation.decision(Decision.YES, NAME, "There are no eligible nodes available."); |
| 115 | + } |
| 116 | + assert totalShards > 0; |
| 117 | + final double idealAllocation = Math.ceil((double) totalShards / eligibleNodes.size()); |
| 118 | + |
| 119 | + // Adding the excess shards before division ensures that with tolerance 1 we get: |
| 120 | + // 2 shards, 2 nodes, allow 2 on each |
| 121 | + // 3 shards, 2 nodes, allow 2 on each etc. |
| 122 | + final int threshold = Math.ceilDiv(totalShards + indexBalanceConstraintSettings.getExcessShards(), eligibleNodes.size()); |
| 123 | + final int currentAllocation = node.numberOfOwningShardsForIndex(index); |
| 124 | + |
| 125 | + if (currentAllocation >= threshold) { |
| 126 | + String explanation = Strings.format( |
| 127 | + "There are [%d] eligible nodes in the [%s] tier for assignment of [%d] shards in index [%s]. Ideally no more than [%.0f] " |
| 128 | + + "shard would be assigned per node (the index balance excess shards setting is [%d]). This node is already assigned" |
| 129 | + + " [%d] shards of the index.", |
| 130 | + eligibleNodes.size(), |
| 131 | + nomenclature, |
| 132 | + totalShards, |
| 133 | + index, |
| 134 | + idealAllocation, |
| 135 | + indexBalanceConstraintSettings.getExcessShards(), |
| 136 | + currentAllocation |
| 137 | + ); |
| 138 | + |
| 139 | + logger.trace(explanation); |
| 140 | + |
| 141 | + return allocation.decision(Decision.NOT_PREFERRED, NAME, explanation); |
| 142 | + } |
| 143 | + |
| 144 | + return allocation.decision(Decision.YES, NAME, "Node index shard allocation is under the threshold."); |
| 145 | + } |
| 146 | + |
| 147 | + private void collectEligibleNodes(RoutingAllocation allocation, Set<DiscoveryNode> eligibleNodes, DiscoveryNodeRole role) { |
| 148 | + for (DiscoveryNode discoveryNode : allocation.nodes()) { |
| 149 | + if (discoveryNode.getRoles().contains(role) && allocation.metadata().nodeShutdowns().contains(discoveryNode.getId()) == false) { |
| 150 | + eligibleNodes.add(discoveryNode); |
| 151 | + } |
| 152 | + } |
| 153 | + } |
| 154 | + |
| 155 | + private void setClusterRequireFilters(Map<String, List<String>> filters) { |
| 156 | + clusterRequireFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(AND, filters)); |
| 157 | + } |
| 158 | + |
| 159 | + private void setClusterIncludeFilters(Map<String, List<String>> filters) { |
| 160 | + clusterIncludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); |
| 161 | + } |
| 162 | + |
| 163 | + private void setClusterExcludeFilters(Map<String, List<String>> filters) { |
| 164 | + clusterExcludeFilters = DiscoveryNodeFilters.trimTier(DiscoveryNodeFilters.buildFromKeyValues(OR, filters)); |
| 165 | + } |
| 166 | + |
| 167 | + private boolean hasFilters() { |
| 168 | + return (clusterExcludeFilters != null && clusterExcludeFilters.hasFilters()) |
| 169 | + || (clusterIncludeFilters != null && clusterIncludeFilters.hasFilters()) |
| 170 | + || (clusterRequireFilters != null && clusterRequireFilters.hasFilters()); |
| 171 | + } |
| 172 | +} |
0 commit comments