-
Notifications
You must be signed in to change notification settings - Fork 25.8k
Add metrics tracking shard time from unassigned to initialized/started #144521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
838d6a7
f0ddd6e
30d8f6b
232a32d
dd22525
0e4fa41
547c8e6
e9b1525
32b30a8
60d84e7
65dbfcd
a6b4d2f
5048da9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,11 +14,51 @@ | |
| import org.elasticsearch.cluster.routing.RoutingChangesObserver; | ||
| import org.elasticsearch.cluster.routing.ShardRouting; | ||
| import org.elasticsearch.cluster.routing.UnassignedInfo; | ||
| import org.elasticsearch.telemetry.metric.LongHistogram; | ||
| import org.elasticsearch.telemetry.metric.MeterRegistry; | ||
|
|
||
| public class ShardChangesObserver implements RoutingChangesObserver { | ||
| import java.util.Arrays; | ||
| import java.util.Map; | ||
| import java.util.function.LongSupplier; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /// Observes shard state transitions during allocation rounds, logging them and emitting APM timing metrics. | ||
| class ShardChangesObserver implements RoutingChangesObserver { | ||
| private static final Logger logger = LogManager.getLogger(ShardChangesObserver.class); | ||
|
|
||
| static final String UNASSIGNED_TO_INITIALIZING_METRIC = "es.allocator.shards.unassigned_to_initializing.duration.histogram"; | ||
| static final String UNASSIGNED_TO_STARTED_METRIC = "es.allocator.shards.unassigned_to_started.duration.histogram"; | ||
|
|
||
| private static final Map<UnassignedInfo.Reason, Map<String, Object>> PRIMARY_ATTRIBUTES = buildAttributesByReason(true); | ||
| private static final Map<UnassignedInfo.Reason, Map<String, Object>> REPLICA_ATTRIBUTES = buildAttributesByReason(false); | ||
|
|
||
| private static Map<UnassignedInfo.Reason, Map<String, Object>> buildAttributesByReason(boolean primary) { | ||
| return Arrays.stream(UnassignedInfo.Reason.values()) | ||
| .collect(Collectors.toUnmodifiableMap(r -> r, r -> Map.of("es_shard_primary", primary, "es_shard_reason", r.name()))); | ||
| } | ||
|
|
||
| private final LongHistogram unassignedToInitializingDuration; | ||
| private final LongHistogram unassignedToStartedDuration; | ||
| private final LongSupplier currentTimeMillisSupplier; | ||
|
|
||
| ShardChangesObserver(MeterRegistry meterRegistry) { | ||
| this(meterRegistry, System::currentTimeMillis); | ||
| } | ||
|
|
||
| ShardChangesObserver(MeterRegistry meterRegistry, LongSupplier currentTimeMillisSupplier) { | ||
| this.unassignedToInitializingDuration = meterRegistry.registerLongHistogram( | ||
| UNASSIGNED_TO_INITIALIZING_METRIC, | ||
| "Duration a shard spent in UNASSIGNED state before being assigned to a node", | ||
| "ms" | ||
| ); | ||
| this.unassignedToStartedDuration = meterRegistry.registerLongHistogram( | ||
| UNASSIGNED_TO_STARTED_METRIC, | ||
| "Total duration from when a shard became UNASSIGNED to when it became STARTED", | ||
| "ms" | ||
| ); | ||
| this.currentTimeMillisSupplier = currentTimeMillisSupplier; | ||
| } | ||
|
|
||
| @Override | ||
| public void shardInitialized(ShardRouting unassignedShard, ShardRouting initializedShard) { | ||
| logger.trace( | ||
|
|
@@ -27,6 +67,11 @@ public void shardInitialized(ShardRouting unassignedShard, ShardRouting initiali | |
| initializedShard.recoverySource().getType(), | ||
| initializedShard.currentNodeId() | ||
| ); | ||
| UnassignedInfo info = unassignedShard.unassignedInfo(); | ||
| if (info != null) { | ||
| long durationMillis = currentTimeMillisSupplier.getAsLong() - info.unassignedTimeMillis(); | ||
| unassignedToInitializingDuration.record(Math.max(0, durationMillis), attributes(info, initializedShard)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious about the need to floor this value at 0, is this just to accommodate slight clock skew between the nodes? Otherwise, if we are expecting to see missing unassigned times for some reason, is recording a 0 in that case going to dilute the histogram?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yep, that's the only reason! I went through the code and
Clock skew large enough to produce a negative value would be unusual in practice so not too worried about this. Considering that, I'd rather floor to 0 than skip the recording entirely. Otherwise we could silently be dropping actual data points would make the histogram's total count slightly undercount actual transitions, which seems worse than a rare artificial 0ms entry. Happy to reconsider if you feel strongly though!
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for explaining, agree that we should just round to zero if it's due to clock skew |
||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -37,6 +82,12 @@ public void shardStarted(ShardRouting initializingShard, ShardRouting startedSha | |
| initializingShard.recoverySource().getType(), | ||
| startedShard.currentNodeId() | ||
| ); | ||
| // Relocation target shards have no unassignedInfo | ||
| UnassignedInfo info = initializingShard.unassignedInfo(); | ||
| if (info != null) { | ||
| long durationMillis = currentTimeMillisSupplier.getAsLong() - info.unassignedTimeMillis(); | ||
| unassignedToStartedDuration.record(Math.max(0, durationMillis), attributes(info, startedShard)); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -63,4 +114,8 @@ public void replicaPromoted(ShardRouting replicaShard) { | |
| private static String shardIdentifier(ShardRouting shardRouting) { | ||
| return shardRouting.shardId().toString() + '[' + (shardRouting.primary() ? 'P' : 'R') + ']'; | ||
| } | ||
|
|
||
| private static Map<String, Object> attributes(UnassignedInfo info, ShardRouting shard) { | ||
| return (shard.primary() ? PRIMARY_ATTRIBUTES : REPLICA_ATTRIBUTES).get(info.reason()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like in the past I've had issues with attribute values being something other than a string. I think the
TestTelemetryProvidercopes with it but the real one doesn't. Please just verify that because it may have changed or I may have misremembered.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
r.name()is a String so that one should work.For the
es_shard_primaryboolean, I traced down the code, it seems that this should be handled properly:elasticsearch/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/metrics/OtelHelper.java
Lines 30 to 60 in 705a330
There are also other example paths in Elasticsearch using boolean attributes, e.g the
system_threadsetting in:elasticsearch/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java
Lines 85 to 97 in 1d0f434
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great, thanks for double-checking!