-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Metrics to account for time spent waiting for next chunk #129469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
8763cd5
6f97068
160aa68
ef7afa4
6472d42
dc16d64
6a6c27a
c6a60b7
6799c5e
f8ca2a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |||||
| import org.elasticsearch.core.Releasables; | ||||||
| import org.elasticsearch.core.TimeValue; | ||||||
| import org.elasticsearch.index.IndexingPressure; | ||||||
| import org.elasticsearch.rest.action.document.BulkOperationWaitForChunkMetrics; | ||||||
|
|
||||||
| import java.util.ArrayList; | ||||||
| import java.util.Collections; | ||||||
|
|
@@ -43,10 +44,16 @@ public class IncrementalBulkService { | |||||
| private final Client client; | ||||||
| private final AtomicBoolean enabledForTests = new AtomicBoolean(true); | ||||||
| private final IndexingPressure indexingPressure; | ||||||
| private final BulkOperationWaitForChunkMetrics bulkOperationWaitForChunkMetrics; | ||||||
|
|
||||||
| public IncrementalBulkService(Client client, IndexingPressure indexingPressure) { | ||||||
| public IncrementalBulkService( | ||||||
| Client client, | ||||||
| IndexingPressure indexingPressure, | ||||||
| BulkOperationWaitForChunkMetrics bulkOperationWaitForChunkMetrics | ||||||
| ) { | ||||||
| this.client = client; | ||||||
| this.indexingPressure = indexingPressure; | ||||||
| this.bulkOperationWaitForChunkMetrics = bulkOperationWaitForChunkMetrics; | ||||||
| } | ||||||
|
|
||||||
| public Handler newBulkRequest() { | ||||||
|
|
@@ -56,7 +63,7 @@ public Handler newBulkRequest() { | |||||
|
|
||||||
| public Handler newBulkRequest(@Nullable String waitForActiveShards, @Nullable TimeValue timeout, @Nullable String refresh) { | ||||||
| ensureEnabled(); | ||||||
| return new Handler(client, indexingPressure, waitForActiveShards, timeout, refresh); | ||||||
| return new Handler(client, indexingPressure, waitForActiveShards, timeout, refresh, bulkOperationWaitForChunkMetrics); | ||||||
| } | ||||||
|
|
||||||
| private void ensureEnabled() { | ||||||
|
|
@@ -105,26 +112,35 @@ public static class Handler implements Releasable { | |||||
| private boolean bulkInProgress = false; | ||||||
| private Exception bulkActionLevelFailure = null; | ||||||
| private BulkRequest bulkRequest = null; | ||||||
| private final BulkOperationWaitForChunkMetrics bulkOperationWaitForChunkMetrics; | ||||||
|
||||||
|
|
||||||
| protected Handler( | ||||||
| Client client, | ||||||
| IndexingPressure indexingPressure, | ||||||
| @Nullable String waitForActiveShards, | ||||||
| @Nullable TimeValue timeout, | ||||||
| @Nullable String refresh | ||||||
| @Nullable String refresh, | ||||||
| @Nullable BulkOperationWaitForChunkMetrics bulkOperationWaitForChunkMetrics | ||||||
| ) { | ||||||
| this.client = client; | ||||||
| this.waitForActiveShards = waitForActiveShards != null ? ActiveShardCount.parseString(waitForActiveShards) : null; | ||||||
| this.timeout = timeout; | ||||||
| this.refresh = refresh; | ||||||
| this.incrementalOperation = indexingPressure.startIncrementalCoordinating(0, 0, false); | ||||||
| this.bulkOperationWaitForChunkMetrics = bulkOperationWaitForChunkMetrics; | ||||||
| createNewBulkRequest(EMPTY_STATE); | ||||||
| } | ||||||
|
|
||||||
| public IndexingPressure.Incremental getIncrementalOperation() { | ||||||
| return incrementalOperation; | ||||||
| } | ||||||
|
|
||||||
| public void updateWaitForChunkMetrics(long chunkWaitTimeCentis) { | ||||||
|
||||||
| public void updateWaitForChunkMetrics(long chunkWaitTimeCentis) { | |
| public void recordWaitForNextChunkTime(long waitForNextChunkTimeInMillis) { |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we can assert that it is not null instead here?
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -195,6 +195,7 @@ | |
| import org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthIndicatorService; | ||
| import org.elasticsearch.reservedstate.service.FileSettingsService.FileSettingsHealthTracker; | ||
| import org.elasticsearch.reservedstate.service.FileSettingsServiceProvider; | ||
| import org.elasticsearch.rest.action.document.BulkOperationWaitForChunkMetrics; | ||
| import org.elasticsearch.rest.action.search.SearchResponseMetrics; | ||
| import org.elasticsearch.script.ScriptModule; | ||
| import org.elasticsearch.script.ScriptService; | ||
|
|
@@ -940,6 +941,9 @@ public Map<String, String> queryFields() { | |
| ); | ||
|
|
||
| final IndexingPressure indexingLimits = new IndexingPressure(settings); | ||
| final BulkOperationWaitForChunkMetrics bulkOperationWaitForChunkMetrics = new BulkOperationWaitForChunkMetrics( | ||
| telemetryProvider.getMeterRegistry() | ||
| ); | ||
|
|
||
| PluginServiceInstances pluginServices = new PluginServiceInstances( | ||
| client, | ||
|
|
@@ -997,7 +1001,11 @@ public Map<String, String> queryFields() { | |
| .map(TerminationHandlerProvider::handler); | ||
| terminationHandler = getSinglePlugin(terminationHandlers, TerminationHandler.class).orElse(null); | ||
|
|
||
| final IncrementalBulkService incrementalBulkService = new IncrementalBulkService(client, indexingLimits); | ||
| final IncrementalBulkService incrementalBulkService = new IncrementalBulkService( | ||
| client, | ||
| indexingLimits, | ||
| bulkOperationWaitForChunkMetrics | ||
|
||
| ); | ||
|
|
||
| final ResponseCollectorService responseCollectorService = new ResponseCollectorService(clusterService); | ||
| modules.bindToInstance(ResponseCollectorService.class, responseCollectorService); | ||
|
|
@@ -1253,6 +1261,7 @@ public Map<String, String> queryFields() { | |
| b.bind(PageCacheRecycler.class).toInstance(pageCacheRecycler); | ||
| b.bind(IngestService.class).toInstance(ingestService); | ||
| b.bind(IndexingPressure.class).toInstance(indexingLimits); | ||
| b.bind(BulkOperationWaitForChunkMetrics.class).toInstance(bulkOperationWaitForChunkMetrics); | ||
|
||
| b.bind(IncrementalBulkService.class).toInstance(incrementalBulkService); | ||
| b.bind(AggregationUsageService.class).toInstance(searchModule.getValuesSourceRegistry().getUsageService()); | ||
| b.bind(MetaStateService.class).toInstance(metaStateService); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,39 @@ | ||||||
| /* | ||||||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||||||
| * or more contributor license agreements. Licensed under the "Elastic License | ||||||
| * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||||||
| * Public License v 1"; you may not use this file except in compliance with, at | ||||||
| * your election, the "Elastic License 2.0", the "GNU Affero General Public | ||||||
| * License v3.0 only", or the "Server Side Public License, v 1". | ||||||
| */ | ||||||
|
|
||||||
| package org.elasticsearch.rest.action.document; | ||||||
|
|
||||||
| import org.elasticsearch.telemetry.metric.LongHistogram; | ||||||
| import org.elasticsearch.telemetry.metric.MeterRegistry; | ||||||
|
|
||||||
| public class BulkOperationWaitForChunkMetrics { | ||||||
| public static final String CHUNK_WAIT_TIME_HISTOGRAM = "es.rest.wait.duration.histogram"; | ||||||
|
||||||
|
|
||||||
| /* Capture in milliseconds because the APM histogram only has a range of 100,000 */ | ||||||
| private final LongHistogram chunkWaitTimeMillisHistogram; | ||||||
|
||||||
| private final LongHistogram chunkWaitTimeMillisHistogram; | |
| private final LongHistogram chunkWaitTimeInMillisHistogram; |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| "centis" | |
| "ms" |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -41,6 +41,7 @@ | |||||
| import java.util.ArrayList; | ||||||
| import java.util.List; | ||||||
| import java.util.Set; | ||||||
| import java.util.concurrent.TimeUnit; | ||||||
| import java.util.function.Supplier; | ||||||
|
|
||||||
| import static org.elasticsearch.rest.RestRequest.Method.POST; | ||||||
|
|
@@ -157,6 +158,9 @@ static class ChunkHandler implements BaseRestHandler.RequestBodyChunkConsumer { | |||||
| private final ArrayDeque<ReleasableBytesReference> unParsedChunks = new ArrayDeque<>(4); | ||||||
| private final ArrayList<DocWriteRequest<?>> items = new ArrayList<>(4); | ||||||
|
|
||||||
| private long requestNextChunkTime; | ||||||
| private long totalChunkWaitTime = 0L; | ||||||
|
|
||||||
| ChunkHandler(boolean allowExplicitIndex, RestRequest request, Supplier<IncrementalBulkService.Handler> handlerSupplier) { | ||||||
| this.request = request; | ||||||
| this.handlerSupplier = handlerSupplier; | ||||||
|
|
@@ -182,12 +186,18 @@ public void accept(RestChannel restChannel) { | |||||
| this.restChannel = restChannel; | ||||||
| this.handler = handlerSupplier.get(); | ||||||
| request.contentStream().next(); | ||||||
| requestNextChunkTime = System.nanoTime(); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We often pass LongSupplier instead of the real time measurement method, it makes testing easier. |
||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| public void handleChunk(RestChannel channel, ReleasableBytesReference chunk, boolean isLast) { | ||||||
| assert handler != null; | ||||||
| assert channel == restChannel; | ||||||
| long elapsedTime = System.nanoTime() - requestNextChunkTime; | ||||||
| if (elapsedTime > 0) { | ||||||
| totalChunkWaitTime += elapsedTime; | ||||||
| requestNextChunkTime = 0L; | ||||||
|
||||||
| } | ||||||
| if (shortCircuited) { | ||||||
| chunk.close(); | ||||||
| return; | ||||||
|
|
@@ -231,13 +241,20 @@ public void handleChunk(RestChannel channel, ReleasableBytesReference chunk, boo | |||||
| items.clear(); | ||||||
| handler.lastItems(toPass, () -> Releasables.close(releasables), new RestRefCountedChunkedToXContentListener<>(channel)); | ||||||
| } | ||||||
| totalChunkWaitTime = TimeUnit.NANOSECONDS.toMillis(totalChunkWaitTime); | ||||||
| handler.updateWaitForChunkMetrics(totalChunkWaitTime); | ||||||
| totalChunkWaitTime = 0L; | ||||||
|
||||||
| totalChunkWaitTime = 0L; | |
| totalChunkWaitTime = -1L; |
and then assert totalChunkWaitTime>= 0L in the handleChunk method?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know if request.contentStream().next(); immediately calls handleChunk if data is available? Otherwise I wonder if we want to capture the time before calling next.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should do it prior, seems more correct regardless.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also set this before calling next here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder what are the advantages of using a dedicated
BulkOperationWaitForChunkMetricsobject here? Maube just inject theMeterRegistryand declare the histogram metric in IncrementalBulkService would be simpler.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Later on, calling
updateWaitForChunkMetricswould update the metric directly instead of delegating to BulkOperationWaitForChunkMetrics too.