From 838d6a77af4ffe13744c2f342e00328f6a9dc1eb Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Wed, 18 Mar 2026 14:52:43 -0500 Subject: [PATCH 1/6] Add metrics tracking shard time from unassigned to initialized/started state Extends ShardChangesObserver to emit two LongHistogram metrics that track how long a shard takes to go from UNASSIGNED to INITIALIZED to STARTED. Relates to ES-14351 --- .../apm/internal/MetricValidator.java | 2 + .../elasticsearch/cluster/ClusterModule.java | 3 +- .../routing/allocation/AllocationService.java | 23 +++++- .../routing/allocation/RoutingAllocation.java | 26 ++++--- .../allocation/ShardChangesObserver.java | 47 +++++++++++ .../allocation/ShardChangesObserverTests.java | 77 +++++++++++++++++++ .../ThrottlingAllocationDeciderTests.java | 6 +- 7 files changed, 168 insertions(+), 16 deletions(-) diff --git a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java index 9c121b8062055..86a5d0a56896a 100644 --- a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java +++ b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java @@ -171,6 +171,8 @@ private static class Attributes { Map.entry("es.allocator.desired_balance.allocations.node_weight.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.desired_balance.allocations.node_write_load.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.shard_write_load.distribution.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "percentile")), + Map.entry("es.allocator.shards.unassigned_to_initializing.duration.histogram", Set.of("primary", "reason")), + Map.entry("es.allocator.shards.unassigned_to_started.duration.histogram", Set.of("primary", "reason")), Map.entry("es.autoscaling.indexing.node_ingest_load.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "quality", "type")), Map.entry("es.blob_cache.miss_that_triggered_read.total", BLOB_CACHE_ATTRIBUTES), Map.entry("es.blob_cache.population.bytes.total", BLOB_CACHE_ATTRIBUTES), diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 37074fcf4a2dd..15c28c468d44e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -197,7 +197,8 @@ public ClusterModule( shardsAllocator, clusterInfoService, snapshotsInfoService, - shardRoutingRoleStrategy + shardRoutingRoleStrategy, + telemetryProvider.getMeterRegistry() ); this.allocationService.addAllocFailuresResetListenerTo(clusterService); this.metadataDeleteIndexService = new MetadataDeleteIndexService(settings, clusterService, allocationService); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index 96d8a1623cecf..e21ec96b73e96 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -57,6 +57,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.snapshots.SnapshotsInfoService; +import org.elasticsearch.telemetry.metric.MeterRegistry; import java.util.ArrayList; import java.util.Collections; @@ -90,6 +91,7 @@ public class AllocationService { private final ClusterInfoService clusterInfoService; private final SnapshotsInfoService snapshotsInfoService; private final ShardRoutingRoleStrategy shardRoutingRoleStrategy; + private final ShardChangesObserver shardChangesObserver; // only for tests that use the GatewayAllocator as the unique ExistingShardsAllocator @SuppressWarnings("this-escape") @@ -117,6 +119,23 @@ public AllocationService( this.clusterInfoService = clusterInfoService; this.snapshotsInfoService = snapshotsInfoService; this.shardRoutingRoleStrategy = shardRoutingRoleStrategy; + this.shardChangesObserver = ShardChangesObserver.NOOP; + } + + public AllocationService( + AllocationDeciders allocationDeciders, + ShardsAllocator shardsAllocator, + ClusterInfoService clusterInfoService, + SnapshotsInfoService snapshotsInfoService, + ShardRoutingRoleStrategy shardRoutingRoleStrategy, + MeterRegistry meterRegistry + ) { + this.allocationDeciders = allocationDeciders; + this.shardsAllocator = shardsAllocator; + this.clusterInfoService = clusterInfoService; + this.snapshotsInfoService = snapshotsInfoService; + this.shardRoutingRoleStrategy = shardRoutingRoleStrategy; + this.shardChangesObserver = new ShardChangesObserver(meterRegistry); } /** @@ -769,7 +788,9 @@ private RoutingAllocation createRoutingAllocation(ClusterState clusterState, lon clusterState, clusterInfoService.getClusterInfo(), snapshotsInfoService.snapshotShardSizes(), - currentNanoTime + currentNanoTime, + false, + shardChangesObserver ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index a4a2f3212e7e1..2e24701fbb17f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -112,17 +112,17 @@ public RoutingAllocation( SnapshotShardSizeInfo shardSizeInfo, long currentNanoTime ) { - this(deciders, routingNodes, clusterState, clusterInfo, shardSizeInfo, currentNanoTime, false); + this(deciders, routingNodes, clusterState, clusterInfo, shardSizeInfo, currentNanoTime, false, ShardChangesObserver.NOOP); } - /** - * Creates a new {@link RoutingAllocation} - * @param deciders {@link AllocationDeciders} to used to make decisions for routing allocations - * @param routingNodes Routing nodes in the current cluster or {@code null} if using those in the given cluster state - * @param clusterState cluster state before rerouting - * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) - * @param isSimulating {@code true} if "transient" deciders should be ignored because we are simulating the final allocation - */ + /// Creates a new [RoutingAllocation] + /// + /// @param deciders [AllocationDeciders] to used to make decisions for routing allocations + /// @param routingNodes Routing nodes in the current cluster or `null` if using those in the given cluster state + /// @param clusterState cluster state before rerouting + /// @param currentNanoTime the nano time to use for all delay allocation calculation (typically `System#nanoTime()`) + /// @param isSimulating `true` if "transient" deciders should be ignored because we are simulating the final allocation + /// @param shardChangesObserver observer that records shard state transition timing metrics public RoutingAllocation( AllocationDeciders deciders, @Nullable RoutingNodes routingNodes, @@ -130,7 +130,8 @@ public RoutingAllocation( ClusterInfo clusterInfo, SnapshotShardSizeInfo shardSizeInfo, long currentNanoTime, - boolean isSimulating + boolean isSimulating, + ShardChangesObserver shardChangesObserver ) { this.deciders = deciders; this.routingNodes = routingNodes; @@ -154,7 +155,7 @@ public RoutingAllocation( indexMetadataUpdater, restoreInProgressUpdater, resizeSourceIndexUpdater, - new ShardChangesObserver() } + shardChangesObserver } ); } @@ -465,7 +466,8 @@ public RoutingAllocation mutableCloneForSimulation() { clusterInfo, shardSizeInfo, currentNanoTime, - true + true, + ShardChangesObserver.NOOP ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java index 7869e595bb7ab..965036797f557 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java @@ -14,11 +14,44 @@ import org.elasticsearch.cluster.routing.RoutingChangesObserver; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.telemetry.metric.LongHistogram; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import java.util.Map; +import java.util.function.LongSupplier; + +/// Observes shard state transitions during allocation rounds, logging them and emitting APM timing metrics. public class ShardChangesObserver implements RoutingChangesObserver { + public static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; + public static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; + + public static final ShardChangesObserver NOOP = new ShardChangesObserver(MeterRegistry.NOOP); + private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); + private final LongHistogram unassignedToInitializingDuration; + private final LongHistogram unassignedToStartedDuration; + private final LongSupplier currentTimeMillisSupplier; + + public ShardChangesObserver(MeterRegistry meterRegistry) { + this(meterRegistry, System::currentTimeMillis); + } + + ShardChangesObserver(MeterRegistry meterRegistry, LongSupplier currentTimeMillisSupplier) { + this.unassignedToInitializingDuration = meterRegistry.registerLongHistogram( + UNASSIGNED_TO_INITIALIZING_METRIC, + "Duration a shard spent in UNASSIGNED state before being assigned to a node", + "ms" + ); + this.unassignedToStartedDuration = meterRegistry.registerLongHistogram( + UNASSIGNED_TO_STARTED_METRIC, + "Total duration from when a shard became UNASSIGNED to when it became STARTED", + "ms" + ); + this.currentTimeMillisSupplier = currentTimeMillisSupplier; + } + @Override public void shardInitialized(ShardRouting unassignedShard, ShardRouting initializedShard) { logger.trace( @@ -27,6 +60,11 @@ public void shardInitialized(ShardRouting unassignedShard, ShardRouting initiali initializedShard.recoverySource().getType(), initializedShard.currentNodeId() ); + UnassignedInfo info = unassignedShard.unassignedInfo(); + if (info != null) { + long durationMillis = currentTimeMillisSupplier.getAsLong() - info.unassignedTimeMillis(); + unassignedToInitializingDuration.record(Math.max(0, durationMillis), attributes(info, initializedShard)); + } } @Override @@ -37,6 +75,11 @@ public void shardStarted(ShardRouting initializingShard, ShardRouting startedSha initializingShard.recoverySource().getType(), startedShard.currentNodeId() ); + UnassignedInfo info = initializingShard.unassignedInfo(); + if (info != null) { + long durationMillis = currentTimeMillisSupplier.getAsLong() - info.unassignedTimeMillis(); + unassignedToStartedDuration.record(Math.max(0, durationMillis), attributes(info, startedShard)); + } } @Override @@ -63,4 +106,8 @@ public void replicaPromoted(ShardRouting replicaShard) { private static String shardIdentifier(ShardRouting shardRouting) { return shardRouting.shardId().toString() + '[' + (shardRouting.primary() ? 'P' : 'R') + ']'; } + + private static Map attributes(UnassignedInfo info, ShardRouting shard) { + return Map.of("primary", shard.primary(), "reason", info.reason().name()); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java index 76bad33f83091..764e4e504be09 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java @@ -23,15 +23,25 @@ import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; import org.elasticsearch.test.MockLog; import org.elasticsearch.test.junit.annotations.TestLogging; +import java.util.Collections; +import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder; import static org.elasticsearch.test.MockLog.assertThatLogger; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; @TestLogging(value = "org.elasticsearch.cluster.routing.allocation.ShardChangesObserver:TRACE", reason = "verifies debug level logging") public class ShardChangesObserverTests extends ESAllocationTestCase { @@ -143,4 +153,71 @@ public void testLogShardFailureAndPromotion() { ) ); } + + public void testUnassignedMetrics() { + final var meterRegistry = new RecordingMeterRegistry(); + final long unassignedAtMillis = randomLongBetween(0, System.currentTimeMillis() + TimeUnit.HOURS.toMillis(1)); + final AtomicLong nowMillis = new AtomicLong(unassignedAtMillis); + final var observer = new ShardChangesObserver(meterRegistry, nowMillis::get); + + final var reason = randomFrom(UnassignedInfo.Reason.values()); + final var unassignedInfo = new UnassignedInfo( + reason, + null, + null, + 0, + System.nanoTime(), + unassignedAtMillis, + false, + UnassignedInfo.AllocationStatus.NO_ATTEMPT, + Collections.emptySet(), + null + ); + final var shardId = new ShardId("test-index", "_na_", 0); + final var primary = randomBoolean(); + final var unassignedShard = shardRoutingBuilder(shardId, null, primary, ShardRoutingState.UNASSIGNED).withUnassignedInfo( + unassignedInfo + ).build(); + + final var recorder = meterRegistry.getRecorder(); + assertThat( + recorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, ShardChangesObserver.UNASSIGNED_TO_INITIALIZING_METRIC).size(), + equalTo(0) + ); + assertThat( + recorder.getMeasurements(InstrumentType.LONG_HISTOGRAM, ShardChangesObserver.UNASSIGNED_TO_STARTED_METRIC).size(), + equalTo(0) + ); + + final var initializedShard = shardRoutingBuilder(shardId, "node-1", primary, ShardRoutingState.INITIALIZING).withRecoverySource( + RecoverySource.EmptyStoreRecoverySource.INSTANCE + ).withUnassignedInfo(unassignedInfo).build(); + final long initializedTimeMillis = System.currentTimeMillis(); + nowMillis.set(initializedTimeMillis); + observer.shardInitialized(unassignedShard, initializedShard); + + final var startedShard = shardRoutingBuilder(shardId, "node-1", primary, ShardRoutingState.STARTED).build(); + final long startedTimeMillis = initializedTimeMillis + randomLongBetween(0, 1000L); + nowMillis.set(startedTimeMillis); + observer.shardStarted(initializedShard, startedShard); + + final List initializedMetrics = recorder.getMeasurements( + InstrumentType.LONG_HISTOGRAM, + ShardChangesObserver.UNASSIGNED_TO_INITIALIZING_METRIC + ); + assertThat(initializedMetrics, hasSize(1)); + final var initializedMetricValue = initializedMetrics.getFirst(); + assertThat(initializedMetricValue.getLong(), equalTo(Math.max(0, initializedTimeMillis - unassignedAtMillis))); + assertThat(initializedMetricValue.attributes().get("primary"), equalTo(primary)); + assertThat(initializedMetricValue.attributes().get("reason"), equalTo(reason.name())); + + final List startedMetrics = recorder.getMeasurements( + InstrumentType.LONG_HISTOGRAM, + ShardChangesObserver.UNASSIGNED_TO_STARTED_METRIC + ); + final var startedMetricValue = startedMetrics.getFirst(); + assertThat(startedMetricValue.getLong(), equalTo(Math.max(0, startedTimeMillis - unassignedAtMillis))); + assertThat(startedMetricValue.attributes().get("primary"), equalTo(primary)); + assertThat(startedMetricValue.attributes().get("reason"), equalTo(reason.name())); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java index 663962e644f6a..3391c552ae6ce 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java @@ -115,7 +115,8 @@ public void testPrimaryAndReplicaThrottlingNotSimulation() { ClusterInfo.builder().build(), null, System.nanoTime(), - false // Turn off isSimulating + false, // Turn off isSimulating + ShardChangesObserver.NOOP ); Settings settings = Settings.builder() @@ -189,7 +190,8 @@ public void testPrimaryAndReplicaThrottlingInSimulation() { ClusterInfo.builder().build(), null, System.nanoTime(), - true // Turn on isSimulating + true, // Turn on isSimulating + ShardChangesObserver.NOOP ); Settings settings = Settings.builder() From f0ddd6e093291ba64ed1fdea4fb148b73cb020a3 Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Wed, 18 Mar 2026 16:32:51 -0500 Subject: [PATCH 2/6] Small fixes and optimizations --- .../apm/internal/MetricValidator.java | 4 ++-- .../routing/allocation/AllocationService.java | 17 +------------- .../allocation/ShardChangesObserver.java | 23 ++++++++++++++++++- .../allocation/AllocationServiceTests.java | 10 +++++--- .../allocation/ShardChangesObserverTests.java | 21 +++++++++++++---- .../DesiredBalanceReconcilerTests.java | 3 ++- ...TransportGetShutdownStatusActionTests.java | 4 +++- 7 files changed, 53 insertions(+), 29 deletions(-) diff --git a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java index 86a5d0a56896a..2f1b9ba1a1b18 100644 --- a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java +++ b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java @@ -171,8 +171,8 @@ private static class Attributes { Map.entry("es.allocator.desired_balance.allocations.node_weight.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.desired_balance.allocations.node_write_load.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.shard_write_load.distribution.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "percentile")), - Map.entry("es.allocator.shards.unassigned_to_initializing.duration.histogram", Set.of("primary", "reason")), - Map.entry("es.allocator.shards.unassigned_to_started.duration.histogram", Set.of("primary", "reason")), + Map.entry("es.allocator.shards.unassigned_to_initializing.duration.histogram", Set.of("primary", "reason", "delayed")), + Map.entry("es.allocator.shards.unassigned_to_started.duration.histogram", Set.of("primary", "reason", "delayed")), Map.entry("es.autoscaling.indexing.node_ingest_load.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "quality", "type")), Map.entry("es.blob_cache.miss_that_triggered_read.total", BLOB_CACHE_ATTRIBUTES), Map.entry("es.blob_cache.population.bytes.total", BLOB_CACHE_ATTRIBUTES), diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index e21ec96b73e96..b5366cad8f79f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -103,25 +103,10 @@ public AllocationService( SnapshotsInfoService snapshotsInfoService, ShardRoutingRoleStrategy shardRoutingRoleStrategy ) { - this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, shardRoutingRoleStrategy); + this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, shardRoutingRoleStrategy, MeterRegistry.NOOP); setExistingShardsAllocators(Collections.singletonMap(GatewayAllocator.ALLOCATOR_NAME, gatewayAllocator)); } - public AllocationService( - AllocationDeciders allocationDeciders, - ShardsAllocator shardsAllocator, - ClusterInfoService clusterInfoService, - SnapshotsInfoService snapshotsInfoService, - ShardRoutingRoleStrategy shardRoutingRoleStrategy - ) { - this.allocationDeciders = allocationDeciders; - this.shardsAllocator = shardsAllocator; - this.clusterInfoService = clusterInfoService; - this.snapshotsInfoService = snapshotsInfoService; - this.shardRoutingRoleStrategy = shardRoutingRoleStrategy; - this.shardChangesObserver = ShardChangesObserver.NOOP; - } - public AllocationService( AllocationDeciders allocationDeciders, ShardsAllocator shardsAllocator, diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java index 965036797f557..2dd77eb0ff401 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java @@ -17,8 +17,10 @@ import org.elasticsearch.telemetry.metric.LongHistogram; import org.elasticsearch.telemetry.metric.MeterRegistry; +import java.util.Arrays; import java.util.Map; import java.util.function.LongSupplier; +import java.util.stream.Collectors; /// Observes shard state transitions during allocation rounds, logging them and emitting APM timing metrics. public class ShardChangesObserver implements RoutingChangesObserver { @@ -26,6 +28,24 @@ public class ShardChangesObserver implements RoutingChangesObserver { public static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; public static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; + private static final Map>> PRIMARY_ATTRIBUTES = Map.of( + false, + buildAttributesByReason(true, false), + true, + buildAttributesByReason(true, true) + ); + private static final Map>> REPLICA_ATTRIBUTES = Map.of( + false, + buildAttributesByReason(false, false), + true, + buildAttributesByReason(false, true) + ); + + private static Map> buildAttributesByReason(boolean primary, boolean delayed) { + return Arrays.stream(UnassignedInfo.Reason.values()) + .collect(Collectors.toUnmodifiableMap(r -> r, r -> Map.of("primary", primary, "reason", r.name(), "delayed", delayed))); + } + public static final ShardChangesObserver NOOP = new ShardChangesObserver(MeterRegistry.NOOP); private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); @@ -75,6 +95,7 @@ public void shardStarted(ShardRouting initializingShard, ShardRouting startedSha initializingShard.recoverySource().getType(), startedShard.currentNodeId() ); + // Relocation target shards have no unassignedInfo UnassignedInfo info = initializingShard.unassignedInfo(); if (info != null) { long durationMillis = currentTimeMillisSupplier.getAsLong() - info.unassignedTimeMillis(); @@ -108,6 +129,6 @@ private static String shardIdentifier(ShardRouting shardRouting) { } private static Map attributes(UnassignedInfo info, ShardRouting shard) { - return Map.of("primary", shard.primary(), "reason", info.reason().name()); + return (shard.primary() ? PRIMARY_ATTRIBUTES : REPLICA_ATTRIBUTES).get(info.delayed()).get(info.reason()); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationServiceTests.java index a86502e87344a..071aafe008714 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationServiceTests.java @@ -44,6 +44,7 @@ import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.snapshots.EmptySnapshotsInfoService; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.gateway.TestGatewayAllocator; @@ -154,7 +155,8 @@ public ShardAllocationDecision explainShardAllocation(ShardRouting shard, Routin }, new EmptyClusterInfoService(), EmptySnapshotsInfoService.INSTANCE, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, + MeterRegistry.NOOP ); final String unrealisticAllocatorName = "unrealistic"; @@ -268,7 +270,8 @@ public void testExplainsNonAllocationOfShardWithUnknownAllocator() { null, null, null, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, + MeterRegistry.NOOP ); allocationService.setExistingShardsAllocators( Collections.singletonMap(GatewayAllocator.ALLOCATOR_NAME, new TestGatewayAllocator()) @@ -390,7 +393,8 @@ public void testAutoExpandReplicas() throws Exception { null, new EmptyClusterInfoService(), EmptySnapshotsInfoService.INSTANCE, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, + MeterRegistry.NOOP ); final ProjectId project1 = randomUniqueProjectId(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java index 764e4e504be09..16192b9aa92c3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java @@ -161,17 +161,23 @@ public void testUnassignedMetrics() { final var observer = new ShardChangesObserver(meterRegistry, nowMillis::get); final var reason = randomFrom(UnassignedInfo.Reason.values()); + // delayed=true is only valid for NODE_LEFT and NODE_RESTARTING + final boolean delayed = (reason == UnassignedInfo.Reason.NODE_LEFT || reason == UnassignedInfo.Reason.NODE_RESTARTING) + && randomBoolean(); + // ALLOCATION_FAILED needs failedAllocations > 0; NODE_RESTARTING needs a lastAllocatedNodeId + final int failedAllocations = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? randomIntBetween(1, 5) : 0; + final String lastAllocatedNodeId = reason == UnassignedInfo.Reason.NODE_RESTARTING ? randomIdentifier() : null; final var unassignedInfo = new UnassignedInfo( reason, null, null, - 0, + failedAllocations, System.nanoTime(), unassignedAtMillis, - false, - UnassignedInfo.AllocationStatus.NO_ATTEMPT, + delayed, + delayed ? UnassignedInfo.AllocationStatus.DELAYED_ALLOCATION : UnassignedInfo.AllocationStatus.NO_ATTEMPT, Collections.emptySet(), - null + lastAllocatedNodeId ); final var shardId = new ShardId("test-index", "_na_", 0); final var primary = randomBoolean(); @@ -189,8 +195,10 @@ public void testUnassignedMetrics() { equalTo(0) ); + // replica shards must recover from primary + final var recoverySource = primary ? RecoverySource.EmptyStoreRecoverySource.INSTANCE : RecoverySource.PeerRecoverySource.INSTANCE; final var initializedShard = shardRoutingBuilder(shardId, "node-1", primary, ShardRoutingState.INITIALIZING).withRecoverySource( - RecoverySource.EmptyStoreRecoverySource.INSTANCE + recoverySource ).withUnassignedInfo(unassignedInfo).build(); final long initializedTimeMillis = System.currentTimeMillis(); nowMillis.set(initializedTimeMillis); @@ -210,14 +218,17 @@ public void testUnassignedMetrics() { assertThat(initializedMetricValue.getLong(), equalTo(Math.max(0, initializedTimeMillis - unassignedAtMillis))); assertThat(initializedMetricValue.attributes().get("primary"), equalTo(primary)); assertThat(initializedMetricValue.attributes().get("reason"), equalTo(reason.name())); + assertThat(initializedMetricValue.attributes().get("delayed"), equalTo(delayed)); final List startedMetrics = recorder.getMeasurements( InstrumentType.LONG_HISTOGRAM, ShardChangesObserver.UNASSIGNED_TO_STARTED_METRIC ); + assertThat(startedMetrics, hasSize(1)); final var startedMetricValue = startedMetrics.getFirst(); assertThat(startedMetricValue.getLong(), equalTo(Math.max(0, startedTimeMillis - unassignedAtMillis))); assertThat(startedMetricValue.attributes().get("primary"), equalTo(primary)); assertThat(startedMetricValue.attributes().get("reason"), equalTo(reason.name())); + assertThat(startedMetricValue.attributes().get("delayed"), equalTo(delayed)); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java index ddb706e37593b..03c5f132c8c80 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerTests.java @@ -71,6 +71,7 @@ import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotShardSizeInfo; import org.elasticsearch.snapshots.SnapshotsInfoService; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLog; import org.junit.BeforeClass; @@ -1691,7 +1692,7 @@ public void allocate(RoutingAllocation allocation) { public ShardAllocationDecision explainShardAllocation(ShardRouting shard, RoutingAllocation allocation) { throw new AssertionError("should not be called"); } - }, clusterInfoService, snapshotsInfoService, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY); + }, clusterInfoService, snapshotsInfoService, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, MeterRegistry.NOOP); allocationService.setExistingShardsAllocators(Map.of(GatewayAllocator.ALLOCATOR_NAME, new NoOpExistingShardsAllocator())); return allocationService; } diff --git a/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java b/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java index c601fbd9e4346..91775d516ceba 100644 --- a/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java +++ b/x-pack/plugin/shutdown/src/test/java/org/elasticsearch/xpack/shutdown/TransportGetShutdownStatusActionTests.java @@ -57,6 +57,7 @@ import org.elasticsearch.tasks.TaskCancelHelper; import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.gateway.TestGatewayAllocator; import org.elasticsearch.xpack.core.ilm.ErrorStep; @@ -156,7 +157,8 @@ public Decision canRebalance(RoutingAllocation allocation) { new BalancedShardsAllocator(Settings.EMPTY), clusterInfoService, snapshotsInfoService, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY, + MeterRegistry.NOOP ); allocationService.setExistingShardsAllocators(Map.of(GatewayAllocator.ALLOCATOR_NAME, new TestGatewayAllocator())); } From 30d8f6b27b2c3926c98b83d92bc48f6442564d67 Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Wed, 18 Mar 2026 19:21:23 -0500 Subject: [PATCH 3/6] Don't bypass MetricValidator + style changes --- .../telemetry/apm/internal/MetricValidator.java | 2 -- .../routing/allocation/RoutingAllocation.java | 11 +++++++---- .../routing/allocation/ShardChangesObserver.java | 13 ++++++++----- .../allocation/ShardChangesObserverTests.java | 12 ++++++------ .../ThrottlingAllocationDeciderTests.java | 4 ++-- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java index 2f1b9ba1a1b18..9c121b8062055 100644 --- a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java +++ b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/MetricValidator.java @@ -171,8 +171,6 @@ private static class Attributes { Map.entry("es.allocator.desired_balance.allocations.node_weight.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.desired_balance.allocations.node_write_load.current", ALLOCATOR_NODE_ATTRIBUTES), Map.entry("es.allocator.shard_write_load.distribution.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "percentile")), - Map.entry("es.allocator.shards.unassigned_to_initializing.duration.histogram", Set.of("primary", "reason", "delayed")), - Map.entry("es.allocator.shards.unassigned_to_started.duration.histogram", Set.of("primary", "reason", "delayed")), Map.entry("es.autoscaling.indexing.node_ingest_load.current", Sets.addToCopy(ALLOCATOR_NODE_ATTRIBUTES, "quality", "type")), Map.entry("es.blob_cache.miss_that_triggered_read.total", BLOB_CACHE_ATTRIBUTES), Map.entry("es.blob_cache.population.bytes.total", BLOB_CACHE_ATTRIBUTES), diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 2e24701fbb17f..9f730e83250d6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -112,17 +112,20 @@ public RoutingAllocation( SnapshotShardSizeInfo shardSizeInfo, long currentNanoTime ) { - this(deciders, routingNodes, clusterState, clusterInfo, shardSizeInfo, currentNanoTime, false, ShardChangesObserver.NOOP); + this(deciders, routingNodes, clusterState, clusterInfo, shardSizeInfo, currentNanoTime, false, RoutingChangesObserver.NOOP); } /// Creates a new [RoutingAllocation] /// /// @param deciders [AllocationDeciders] to used to make decisions for routing allocations - /// @param routingNodes Routing nodes in the current cluster or `null` if using those in the given cluster state + /// @param routingNodes routing nodes in the current cluster or `null` if using those in the given cluster state /// @param clusterState cluster state before rerouting + /// @param clusterInfo information about node disk usage and shard disk usage + /// @param shardSizeInfo information about snapshot shard sizes /// @param currentNanoTime the nano time to use for all delay allocation calculation (typically `System#nanoTime()`) /// @param isSimulating `true` if "transient" deciders should be ignored because we are simulating the final allocation /// @param shardChangesObserver observer that records shard state transition timing metrics + /// public RoutingAllocation( AllocationDeciders deciders, @Nullable RoutingNodes routingNodes, @@ -131,7 +134,7 @@ public RoutingAllocation( SnapshotShardSizeInfo shardSizeInfo, long currentNanoTime, boolean isSimulating, - ShardChangesObserver shardChangesObserver + RoutingChangesObserver shardChangesObserver ) { this.deciders = deciders; this.routingNodes = routingNodes; @@ -467,7 +470,7 @@ public RoutingAllocation mutableCloneForSimulation() { shardSizeInfo, currentNanoTime, true, - ShardChangesObserver.NOOP + RoutingChangesObserver.NOOP ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java index 2dd77eb0ff401..27e3c0bbbec22 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java @@ -23,7 +23,7 @@ import java.util.stream.Collectors; /// Observes shard state transitions during allocation rounds, logging them and emitting APM timing metrics. -public class ShardChangesObserver implements RoutingChangesObserver { +class ShardChangesObserver implements RoutingChangesObserver { public static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; public static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; @@ -43,18 +43,21 @@ public class ShardChangesObserver implements RoutingChangesObserver { private static Map> buildAttributesByReason(boolean primary, boolean delayed) { return Arrays.stream(UnassignedInfo.Reason.values()) - .collect(Collectors.toUnmodifiableMap(r -> r, r -> Map.of("primary", primary, "reason", r.name(), "delayed", delayed))); + .collect( + Collectors.toUnmodifiableMap( + r -> r, + r -> Map.of("es_shard_primary", primary, "es_shard_reason", r.name(), "es_shard_delayed", delayed) + ) + ); } - public static final ShardChangesObserver NOOP = new ShardChangesObserver(MeterRegistry.NOOP); - private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); private final LongHistogram unassignedToInitializingDuration; private final LongHistogram unassignedToStartedDuration; private final LongSupplier currentTimeMillisSupplier; - public ShardChangesObserver(MeterRegistry meterRegistry) { + ShardChangesObserver(MeterRegistry meterRegistry) { this(meterRegistry, System::currentTimeMillis); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java index 16192b9aa92c3..a363e53c31f45 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java @@ -216,9 +216,9 @@ public void testUnassignedMetrics() { assertThat(initializedMetrics, hasSize(1)); final var initializedMetricValue = initializedMetrics.getFirst(); assertThat(initializedMetricValue.getLong(), equalTo(Math.max(0, initializedTimeMillis - unassignedAtMillis))); - assertThat(initializedMetricValue.attributes().get("primary"), equalTo(primary)); - assertThat(initializedMetricValue.attributes().get("reason"), equalTo(reason.name())); - assertThat(initializedMetricValue.attributes().get("delayed"), equalTo(delayed)); + assertThat(initializedMetricValue.attributes().get("es_shard_primary"), equalTo(primary)); + assertThat(initializedMetricValue.attributes().get("es_shard_reason"), equalTo(reason.name())); + assertThat(initializedMetricValue.attributes().get("es_shard_delayed"), equalTo(delayed)); final List startedMetrics = recorder.getMeasurements( InstrumentType.LONG_HISTOGRAM, @@ -227,8 +227,8 @@ public void testUnassignedMetrics() { assertThat(startedMetrics, hasSize(1)); final var startedMetricValue = startedMetrics.getFirst(); assertThat(startedMetricValue.getLong(), equalTo(Math.max(0, startedTimeMillis - unassignedAtMillis))); - assertThat(startedMetricValue.attributes().get("primary"), equalTo(primary)); - assertThat(startedMetricValue.attributes().get("reason"), equalTo(reason.name())); - assertThat(startedMetricValue.attributes().get("delayed"), equalTo(delayed)); + assertThat(startedMetricValue.attributes().get("es_shard_primary"), equalTo(primary)); + assertThat(startedMetricValue.attributes().get("es_shard_reason"), equalTo(reason.name())); + assertThat(startedMetricValue.attributes().get("es_shard_delayed"), equalTo(delayed)); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java index 3391c552ae6ce..2969c8f0c9d31 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationDeciderTests.java @@ -116,7 +116,7 @@ public void testPrimaryAndReplicaThrottlingNotSimulation() { null, System.nanoTime(), false, // Turn off isSimulating - ShardChangesObserver.NOOP + RoutingChangesObserver.NOOP ); Settings settings = Settings.builder() @@ -191,7 +191,7 @@ public void testPrimaryAndReplicaThrottlingInSimulation() { null, System.nanoTime(), true, // Turn on isSimulating - ShardChangesObserver.NOOP + RoutingChangesObserver.NOOP ); Settings settings = Settings.builder() From dd22525aa381979139e3cf749b95eeec0df2a01a Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Wed, 18 Mar 2026 21:28:15 -0500 Subject: [PATCH 4/6] Remove delayed attribute --- .../routing/allocation/RoutingAllocation.java | 20 ++++++------- .../allocation/ShardChangesObserver.java | 30 +++++-------------- .../allocation/ShardChangesObserverTests.java | 11 ++----- 3 files changed, 19 insertions(+), 42 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 9f730e83250d6..7d98202d6db4c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -95,15 +95,15 @@ public RoutingAllocation( this(deciders, null, clusterState, clusterInfo, shardSizeInfo, currentNanoTime); } - /** - * Creates a new {@link RoutingAllocation} - * @param deciders {@link AllocationDeciders} to used to make decisions for routing allocations - * @param routingNodes Routing nodes in the current cluster or {@code null} if using those in the given cluster state - * @param clusterState cluster state before rerouting - * @param clusterInfo information about node disk usage and shard disk usage - * @param shardSizeInfo information about snapshot shard sizes - * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) - */ + /// Creates a new [RoutingAllocation] + /// + /// @param deciders [AllocationDeciders] to use to make decisions for routing allocations + /// @param routingNodes routing nodes in the current cluster or `null` if using those in the given cluster state + /// @param clusterState cluster state before rerouting + /// @param clusterInfo information about node disk usage and shard disk usage + /// @param shardSizeInfo information about snapshot shard sizes + /// @param currentNanoTime the nano time to use for all delay allocation calculation (typically `System#nanoTime()`) + /// public RoutingAllocation( AllocationDeciders deciders, @Nullable RoutingNodes routingNodes, @@ -117,7 +117,7 @@ public RoutingAllocation( /// Creates a new [RoutingAllocation] /// - /// @param deciders [AllocationDeciders] to used to make decisions for routing allocations + /// @param deciders [AllocationDeciders] to use to make decisions for routing allocations /// @param routingNodes routing nodes in the current cluster or `null` if using those in the given cluster state /// @param clusterState cluster state before rerouting /// @param clusterInfo information about node disk usage and shard disk usage diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java index 27e3c0bbbec22..0a4160694ba17 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java @@ -24,35 +24,19 @@ /// Observes shard state transitions during allocation rounds, logging them and emitting APM timing metrics. class ShardChangesObserver implements RoutingChangesObserver { + private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); public static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; public static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; - private static final Map>> PRIMARY_ATTRIBUTES = Map.of( - false, - buildAttributesByReason(true, false), - true, - buildAttributesByReason(true, true) - ); - private static final Map>> REPLICA_ATTRIBUTES = Map.of( - false, - buildAttributesByReason(false, false), - true, - buildAttributesByReason(false, true) - ); - - private static Map> buildAttributesByReason(boolean primary, boolean delayed) { + private static final Map> PRIMARY_ATTRIBUTES = buildAttributesByReason(true); + private static final Map> REPLICA_ATTRIBUTES = buildAttributesByReason(false); + + private static Map> buildAttributesByReason(boolean primary) { return Arrays.stream(UnassignedInfo.Reason.values()) - .collect( - Collectors.toUnmodifiableMap( - r -> r, - r -> Map.of("es_shard_primary", primary, "es_shard_reason", r.name(), "es_shard_delayed", delayed) - ) - ); + .collect(Collectors.toUnmodifiableMap(r -> r, r -> Map.of("es_shard_primary", primary, "es_shard_reason", r.name()))); } - private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); - private final LongHistogram unassignedToInitializingDuration; private final LongHistogram unassignedToStartedDuration; private final LongSupplier currentTimeMillisSupplier; @@ -132,6 +116,6 @@ private static String shardIdentifier(ShardRouting shardRouting) { } private static Map attributes(UnassignedInfo info, ShardRouting shard) { - return (shard.primary() ? PRIMARY_ATTRIBUTES : REPLICA_ATTRIBUTES).get(info.delayed()).get(info.reason()); + return (shard.primary() ? PRIMARY_ATTRIBUTES : REPLICA_ATTRIBUTES).get(info.reason()); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java index a363e53c31f45..ab3a214096c9d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java @@ -47,7 +47,6 @@ public class ShardChangesObserverTests extends ESAllocationTestCase { public void testLogShardStarting() { - var indexName = randomIdentifier(); var indexMetadata = IndexMetadata.builder(indexName).settings(indexSettings(IndexVersion.current(), 1, 0)).build(); @@ -76,7 +75,6 @@ public void testLogShardStarting() { } public void testLogShardMovement() { - var allocationId = randomUUID(); var indexName = randomIdentifier(); var indexMetadata = IndexMetadata.builder(indexName) @@ -161,9 +159,6 @@ public void testUnassignedMetrics() { final var observer = new ShardChangesObserver(meterRegistry, nowMillis::get); final var reason = randomFrom(UnassignedInfo.Reason.values()); - // delayed=true is only valid for NODE_LEFT and NODE_RESTARTING - final boolean delayed = (reason == UnassignedInfo.Reason.NODE_LEFT || reason == UnassignedInfo.Reason.NODE_RESTARTING) - && randomBoolean(); // ALLOCATION_FAILED needs failedAllocations > 0; NODE_RESTARTING needs a lastAllocatedNodeId final int failedAllocations = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? randomIntBetween(1, 5) : 0; final String lastAllocatedNodeId = reason == UnassignedInfo.Reason.NODE_RESTARTING ? randomIdentifier() : null; @@ -174,8 +169,8 @@ public void testUnassignedMetrics() { failedAllocations, System.nanoTime(), unassignedAtMillis, - delayed, - delayed ? UnassignedInfo.AllocationStatus.DELAYED_ALLOCATION : UnassignedInfo.AllocationStatus.NO_ATTEMPT, + false, + UnassignedInfo.AllocationStatus.NO_ATTEMPT, Collections.emptySet(), lastAllocatedNodeId ); @@ -218,7 +213,6 @@ public void testUnassignedMetrics() { assertThat(initializedMetricValue.getLong(), equalTo(Math.max(0, initializedTimeMillis - unassignedAtMillis))); assertThat(initializedMetricValue.attributes().get("es_shard_primary"), equalTo(primary)); assertThat(initializedMetricValue.attributes().get("es_shard_reason"), equalTo(reason.name())); - assertThat(initializedMetricValue.attributes().get("es_shard_delayed"), equalTo(delayed)); final List startedMetrics = recorder.getMeasurements( InstrumentType.LONG_HISTOGRAM, @@ -229,6 +223,5 @@ public void testUnassignedMetrics() { assertThat(startedMetricValue.getLong(), equalTo(Math.max(0, startedTimeMillis - unassignedAtMillis))); assertThat(startedMetricValue.attributes().get("es_shard_primary"), equalTo(primary)); assertThat(startedMetricValue.attributes().get("es_shard_reason"), equalTo(reason.name())); - assertThat(startedMetricValue.attributes().get("es_shard_delayed"), equalTo(delayed)); } } From 0e4fa4102c86f8ff8d0a0fe343dbfb8aecaea418 Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Wed, 18 Mar 2026 23:09:28 -0400 Subject: [PATCH 5/6] Style nits --- .../cluster/routing/allocation/ShardChangesObserver.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java index 0a4160694ba17..607c68579fae9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserver.java @@ -26,8 +26,8 @@ class ShardChangesObserver implements RoutingChangesObserver { private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); - public static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; - public static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; + static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; + static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; private static final Map> PRIMARY_ATTRIBUTES = buildAttributesByReason(true); private static final Map> REPLICA_ATTRIBUTES = buildAttributesByReason(false); From e9b1525ea0b669357534ee6dd601b5623568721c Mon Sep 17 00:00:00 2001 From: Ines Potier Date: Thu, 19 Mar 2026 00:23:58 -0400 Subject: [PATCH 6/6] Comment nits --- .../cluster/routing/allocation/ShardChangesObserverTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java index ab3a214096c9d..6f6b387150f72 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/ShardChangesObserverTests.java @@ -159,8 +159,9 @@ public void testUnassignedMetrics() { final var observer = new ShardChangesObserver(meterRegistry, nowMillis::get); final var reason = randomFrom(UnassignedInfo.Reason.values()); - // ALLOCATION_FAILED needs failedAllocations > 0; NODE_RESTARTING needs a lastAllocatedNodeId + // ALLOCATION_FAILED needs failedAllocations > 0 final int failedAllocations = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? randomIntBetween(1, 5) : 0; + // NODE_RESTARTING needs lastAllocatedNodeId final String lastAllocatedNodeId = reason == UnassignedInfo.Reason.NODE_RESTARTING ? randomIdentifier() : null; final var unassignedInfo = new UnassignedInfo( reason,