Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
*/
package org.elasticsearch.index.shard;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.SetOnce;
Expand All @@ -22,13 +24,16 @@
import org.elasticsearch.cluster.EstimatedHeapUsage;
import org.elasticsearch.cluster.EstimatedHeapUsageCollector;
import org.elasticsearch.cluster.InternalClusterInfoService;
import org.elasticsearch.cluster.NodeUsageStatsForThreadPools;
import org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeUtils;
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
Expand Down Expand Up @@ -73,6 +78,7 @@
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.XContentType;
import org.junit.Assert;

Expand All @@ -85,6 +91,7 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand Down Expand Up @@ -117,14 +124,20 @@
import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.lessThanOrEqualTo;

public class IndexShardIT extends ESSingleNodeTestCase {
private static final Logger logger = LogManager.getLogger(IndexShardIT.class);

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return pluginList(InternalSettingsPlugin.class, BogusEstimatedHeapUsagePlugin.class);
return pluginList(
InternalSettingsPlugin.class,
BogusEstimatedHeapUsagePlugin.class,
BogusNodeUsageStatsForThreadPoolsCollectorPlugin.class
);
}

public void testLockTryingToDelete() throws Exception {
Expand Down Expand Up @@ -295,6 +308,53 @@ public void testHeapUsageEstimateIsPresent() {
}
}

public void testNodeWriteLoadsArePresent() {
InternalClusterInfoService clusterInfoService = (InternalClusterInfoService) getInstanceFromNode(ClusterInfoService.class);
ClusterInfoServiceUtils.refresh(clusterInfoService);
Map<String, NodeUsageStatsForThreadPools> nodeThreadPoolStats = clusterInfoService.getClusterInfo()
.getNodeUsageStatsForThreadPools();
assertNotNull(nodeThreadPoolStats);
/** Not collecting stats yet because allocation write load stats collection is disabled by default.
* see {@link WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING} */
assertTrue(nodeThreadPoolStats.isEmpty());

// Enable collection for node write loads.
updateClusterSettings(
Settings.builder()
.put(
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
)
.build()
);
try {
// Force a ClusterInfo refresh to run collection of the node thread pool usage stats.
ClusterInfoServiceUtils.refresh(clusterInfoService);
nodeThreadPoolStats = clusterInfoService.getClusterInfo().getNodeUsageStatsForThreadPools();

/** Verify that each node has usage stats reported. The test {@link BogusNodeUsageStatsForThreadPoolsCollector} implementation
* generates random usage values */
ClusterState state = getInstanceFromNode(ClusterService.class).state();
assertEquals(state.nodes().size(), nodeThreadPoolStats.size());
for (DiscoveryNode node : state.nodes()) {
assertTrue(nodeThreadPoolStats.containsKey(node.getId()));
NodeUsageStatsForThreadPools nodeUsageStatsForThreadPools = nodeThreadPoolStats.get(node.getId());
assertThat(nodeUsageStatsForThreadPools.nodeId(), equalTo(node.getId()));
NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = nodeUsageStatsForThreadPools
.threadPoolUsageStatsMap()
.get(ThreadPool.Names.WRITE);
assertNotNull(writeThreadPoolStats);
assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThanOrEqualTo(0));
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0.0f));
assertThat(writeThreadPoolStats.averageThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
}
} finally {
updateClusterSettings(
Settings.builder().putNull(WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey()).build()
);
}
}

public void testIndexCanChangeCustomDataPath() throws Exception {
final String index = "test-custom-data-path";
final Path sharedDataPath = getInstanceFromNode(Environment.class).sharedDataDir().resolve(randomAsciiLettersOfLength(10));
Expand Down Expand Up @@ -875,4 +935,61 @@ public ClusterService getClusterService() {
return clusterService.get();
}
}

/**
* A simple {@link NodeUsageStatsForThreadPoolsCollector} implementation that creates and returns random
* {@link NodeUsageStatsForThreadPools} for each node in the cluster.
* <p>
* Note: there's an 'org.elasticsearch.cluster.NodeUsageStatsForThreadPoolsCollector' file that declares this implementation so that the
* plugin system can pick it up and use it for the test set-up.
*/
public static class BogusNodeUsageStatsForThreadPoolsCollector implements NodeUsageStatsForThreadPoolsCollector {

private final BogusNodeUsageStatsForThreadPoolsCollectorPlugin plugin;

public BogusNodeUsageStatsForThreadPoolsCollector(BogusNodeUsageStatsForThreadPoolsCollectorPlugin plugin) {
this.plugin = plugin;
}

@Override
public void collectUsageStats(ActionListener<Map<String, NodeUsageStatsForThreadPools>> listener) {
ActionListener.completeWith(
listener,
() -> plugin.getClusterService()
.state()
.nodes()
.stream()
.collect(Collectors.toUnmodifiableMap(DiscoveryNode::getId, node -> makeRandomNodeUsageStats(node.getId())))
);
}

private NodeUsageStatsForThreadPools makeRandomNodeUsageStats(String nodeId) {
NodeUsageStatsForThreadPools.ThreadPoolUsageStats writeThreadPoolStats = new NodeUsageStatsForThreadPools.ThreadPoolUsageStats(
randomNonNegativeInt(),
randomFloat(),
randomNonNegativeLong()
);
Map<String, NodeUsageStatsForThreadPools.ThreadPoolUsageStats> statsForThreadPools = new HashMap<>();
statsForThreadPools.put(ThreadPool.Names.WRITE, writeThreadPoolStats);
return new NodeUsageStatsForThreadPools(nodeId, statsForThreadPools);
}
}

/**
* Make a plugin to gain access to the {@link ClusterService} instance.
*/
public static class BogusNodeUsageStatsForThreadPoolsCollectorPlugin extends Plugin implements ClusterPlugin {

private final SetOnce<ClusterService> clusterService = new SetOnce<>();

@Override
public Collection<?> createComponents(PluginServices services) {
clusterService.set(services.clusterService());
return List.of();
}

public ClusterService getClusterService() {
return clusterService.get();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the "Elastic License
# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
# Public License v 1"; you may not use this file except in compliance with, at
# your election, the "Elastic License 2.0", the "GNU Affero General Public
# License v3.0 only", or the "Server Side Public License, v 1".
#

org.elasticsearch.index.shard.IndexShardIT$BogusNodeUsageStatsForThreadPoolsCollector
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_EMBEDDING_TYPE = def(9_118_0_00);
public static final TransportVersion ESQL_FIXED_INDEX_LIKE = def(9_119_0_00);
public static final TransportVersion LOOKUP_JOIN_CCS = def(9_120_0_00);
public static final TransportVersion NODE_USAGE_STATS_FOR_THREAD_POOLS_IN_CLUSTER_INFO = def(9_121_0_00);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
40 changes: 34 additions & 6 deletions server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ public class ClusterInfo implements ChunkedToXContent, Writeable {
final Map<NodeAndShard, String> dataPath;
final Map<NodeAndPath, ReservedSpace> reservedSpace;
final Map<String, EstimatedHeapUsage> estimatedHeapUsages;
final Map<String, NodeUsageStatsForThreadPools> nodeUsageStatsForThreadPools;

protected ClusterInfo() {
this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
}

/**
Expand All @@ -73,6 +74,7 @@ protected ClusterInfo() {
* @param dataPath the shard routing to datapath mapping
* @param reservedSpace reserved space per shard broken down by node and data path
* @param estimatedHeapUsages estimated heap usage broken down by node
* @param nodeUsageStatsForThreadPools node-level usage stats (operational load) broken down by node
* @see #shardIdentifierFromRouting
*/
public ClusterInfo(
Expand All @@ -82,7 +84,8 @@ public ClusterInfo(
Map<ShardId, Long> shardDataSetSizes,
Map<NodeAndShard, String> dataPath,
Map<NodeAndPath, ReservedSpace> reservedSpace,
Map<String, EstimatedHeapUsage> estimatedHeapUsages
Map<String, EstimatedHeapUsage> estimatedHeapUsages,
Map<String, NodeUsageStatsForThreadPools> nodeUsageStatsForThreadPools
) {
this.leastAvailableSpaceUsage = Map.copyOf(leastAvailableSpaceUsage);
this.mostAvailableSpaceUsage = Map.copyOf(mostAvailableSpaceUsage);
Expand All @@ -91,6 +94,7 @@ public ClusterInfo(
this.dataPath = Map.copyOf(dataPath);
this.reservedSpace = Map.copyOf(reservedSpace);
this.estimatedHeapUsages = Map.copyOf(estimatedHeapUsages);
this.nodeUsageStatsForThreadPools = Map.copyOf(nodeUsageStatsForThreadPools);
}

public ClusterInfo(StreamInput in) throws IOException {
Expand All @@ -107,6 +111,11 @@ public ClusterInfo(StreamInput in) throws IOException {
} else {
this.estimatedHeapUsages = Map.of();
}
if (in.getTransportVersion().onOrAfter(TransportVersions.NODE_USAGE_STATS_FOR_THREAD_POOLS_IN_CLUSTER_INFO)) {
this.nodeUsageStatsForThreadPools = in.readImmutableMap(NodeUsageStatsForThreadPools::new);
} else {
this.nodeUsageStatsForThreadPools = Map.of();
}
}

@Override
Expand All @@ -124,6 +133,9 @@ public void writeTo(StreamOutput out) throws IOException {
if (out.getTransportVersion().onOrAfter(TransportVersions.HEAP_USAGE_IN_CLUSTER_INFO)) {
out.writeMap(this.estimatedHeapUsages, StreamOutput::writeWriteable);
}
if (out.getTransportVersion().onOrAfter(TransportVersions.NODE_USAGE_STATS_FOR_THREAD_POOLS_IN_CLUSTER_INFO)) {
out.writeMap(this.nodeUsageStatsForThreadPools, StreamOutput::writeWriteable);
}
}

/**
Expand Down Expand Up @@ -204,8 +216,8 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
return builder.endObject(); // NodeAndPath
}),
endArray() // end "reserved_sizes"
// NOTE: We don't serialize estimatedHeapUsages at this stage, to avoid
// committing to API payloads until the feature is settled
// NOTE: We don't serialize estimatedHeapUsages/nodeUsageStatsForThreadPools at this stage, to avoid
// committing to API payloads until the features are settled
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure what the toXContentChunked is used for, but figured I should follow suit.

);
}

Expand All @@ -220,6 +232,13 @@ public Map<String, EstimatedHeapUsage> getEstimatedHeapUsages() {
return estimatedHeapUsages;
}

/**
* Returns a map containing thread pool usage stats for each node, keyed by node ID.
*/
public Map<String, NodeUsageStatsForThreadPools> getNodeUsageStatsForThreadPools() {
return nodeUsageStatsForThreadPools;
}

/**
* Returns a node id to disk usage mapping for the path that has the least available space on the node.
* Note that this does not take account of reserved space: there may be another path with less available _and unreserved_ space.
Expand Down Expand Up @@ -311,12 +330,21 @@ public boolean equals(Object o) {
&& shardSizes.equals(that.shardSizes)
&& shardDataSetSizes.equals(that.shardDataSetSizes)
&& dataPath.equals(that.dataPath)
&& reservedSpace.equals(that.reservedSpace);
&& reservedSpace.equals(that.reservedSpace)
&& nodeUsageStatsForThreadPools.equals(that.nodeUsageStatsForThreadPools);
}

@Override
public int hashCode() {
return Objects.hash(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, shardDataSetSizes, dataPath, reservedSpace);
return Objects.hash(
leastAvailableSpaceUsage,
mostAvailableSpaceUsage,
shardSizes,
shardDataSetSizes,
dataPath,
reservedSpace,
nodeUsageStatsForThreadPools
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class ClusterInfoSimulator {
private final Map<ShardId, Long> shardDataSetSizes;
private final Map<NodeAndShard, String> dataPath;
private final Map<String, EstimatedHeapUsage> estimatedHeapUsages;
private final Map<String, NodeUsageStatsForThreadPools> nodeThreadPoolUsageStats;

public ClusterInfoSimulator(RoutingAllocation allocation) {
this.allocation = allocation;
Expand All @@ -43,6 +44,7 @@ public ClusterInfoSimulator(RoutingAllocation allocation) {
this.shardDataSetSizes = Map.copyOf(allocation.clusterInfo().shardDataSetSizes);
this.dataPath = Map.copyOf(allocation.clusterInfo().dataPath);
this.estimatedHeapUsages = allocation.clusterInfo().getEstimatedHeapUsages();
this.nodeThreadPoolUsageStats = allocation.clusterInfo().getNodeUsageStatsForThreadPools();
}

/**
Expand Down Expand Up @@ -156,7 +158,8 @@ public ClusterInfo getClusterInfo() {
shardDataSetSizes,
dataPath,
Map.of(),
estimatedHeapUsages
estimatedHeapUsages,
nodeThreadPoolUsageStats
);
}
}
Loading