Skip to content

Commit 7d43d8a

Browse files
authored
Expose operation and request counts separately in repository stats (#117530)
Relates ES-9767 Fixes #104443
1 parent 447c4dc commit 7d43d8a

File tree

21 files changed

+498
-211
lines changed

21 files changed

+498
-211
lines changed

docs/changelog/117530.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 117530
2+
summary: Expose operation and request counts separately in repository stats
3+
area: Snapshot/Restore
4+
type: enhancement
5+
issues:
6+
- 104443

modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryMetricsTests.java

Lines changed: 13 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
package org.elasticsearch.repositories.azure;
1111

12-
import com.sun.net.httpserver.Headers;
1312
import com.sun.net.httpserver.HttpExchange;
1413
import com.sun.net.httpserver.HttpHandler;
1514

@@ -34,7 +33,6 @@
3433

3534
import java.io.IOException;
3635
import java.nio.ByteBuffer;
37-
import java.nio.charset.StandardCharsets;
3836
import java.util.ArrayList;
3937
import java.util.List;
4038
import java.util.Map;
@@ -48,6 +46,7 @@
4846
import java.util.stream.IntStream;
4947

5048
import static org.elasticsearch.repositories.azure.AbstractAzureServerTestCase.randomBlobContent;
49+
import static org.elasticsearch.repositories.azure.ResponseInjectingAzureHttpHandler.createFailNRequestsHandler;
5150
import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose;
5251
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
5352
import static org.hamcrest.Matchers.hasSize;
@@ -61,7 +60,7 @@ public class AzureBlobStoreRepositoryMetricsTests extends AzureBlobStoreReposito
6160
);
6261
private static final int MAX_RETRIES = 3;
6362

64-
private final Queue<RequestHandler> requestHandlers = new ConcurrentLinkedQueue<>();
63+
private final Queue<ResponseInjectingAzureHttpHandler.RequestHandler> requestHandlers = new ConcurrentLinkedQueue<>();
6564

6665
@Override
6766
protected Map<String, HttpHandler> createHttpHandlers() {
@@ -106,7 +105,8 @@ public void testThrottleResponsesAreCountedInMetrics() throws IOException {
106105

107106
// Queue up some throttle responses
108107
final int numThrottles = randomIntBetween(1, MAX_RETRIES);
109-
IntStream.range(0, numThrottles).forEach(i -> requestHandlers.offer(new FixedRequestHandler(RestStatus.TOO_MANY_REQUESTS)));
108+
IntStream.range(0, numThrottles)
109+
.forEach(i -> requestHandlers.offer(new ResponseInjectingAzureHttpHandler.FixedRequestHandler(RestStatus.TOO_MANY_REQUESTS)));
110110

111111
// Check that the blob exists
112112
blobContainer.blobExists(purpose, blobName);
@@ -131,7 +131,13 @@ public void testRangeNotSatisfiedAreCountedInMetrics() throws IOException {
131131
clearMetrics(dataNodeName);
132132

133133
// Queue up a range-not-satisfied error
134-
requestHandlers.offer(new FixedRequestHandler(RestStatus.REQUESTED_RANGE_NOT_SATISFIED, null, GET_BLOB_REQUEST_PREDICATE));
134+
requestHandlers.offer(
135+
new ResponseInjectingAzureHttpHandler.FixedRequestHandler(
136+
RestStatus.REQUESTED_RANGE_NOT_SATISFIED,
137+
null,
138+
GET_BLOB_REQUEST_PREDICATE
139+
)
140+
);
135141

136142
// Attempt to read the blob
137143
assertThrows(RequestedRangeNotSatisfiedException.class, () -> blobContainer.readBlob(purpose, blobName));
@@ -163,7 +169,7 @@ public void testErrorResponsesAreCountedInMetrics() throws IOException {
163169
if (status == RestStatus.TOO_MANY_REQUESTS) {
164170
throttles.incrementAndGet();
165171
}
166-
requestHandlers.offer(new FixedRequestHandler(status));
172+
requestHandlers.offer(new ResponseInjectingAzureHttpHandler.FixedRequestHandler(status));
167173
});
168174

169175
// Check that the blob exists
@@ -259,7 +265,7 @@ public void testBatchDeleteFailure() throws IOException {
259265
clearMetrics(dataNodeName);
260266

261267
// Handler will fail one or more of the batch requests
262-
final RequestHandler failNRequestRequestHandler = createFailNRequestsHandler(failedBatches);
268+
final ResponseInjectingAzureHttpHandler.RequestHandler failNRequestRequestHandler = createFailNRequestsHandler(failedBatches);
263269

264270
// Exhaust the retries
265271
IntStream.range(0, (numberOfBatches - failedBatches) + (failedBatches * (MAX_RETRIES + 1)))
@@ -287,35 +293,6 @@ private long getLongCounterTotal(String dataNodeName, String metricKey) {
287293
.reduce(0L, Long::sum);
288294
}
289295

290-
/**
291-
* Creates a {@link RequestHandler} that will persistently fail the first <code>numberToFail</code> distinct requests
292-
* it sees. Any other requests are passed through to the delegate.
293-
*
294-
* @param numberToFail The number of requests to fail
295-
* @return the handler
296-
*/
297-
private static RequestHandler createFailNRequestsHandler(int numberToFail) {
298-
final List<String> requestsToFail = new ArrayList<>(numberToFail);
299-
return (exchange, delegate) -> {
300-
final Headers requestHeaders = exchange.getRequestHeaders();
301-
final String requestId = requestHeaders.get("X-ms-client-request-id").get(0);
302-
boolean failRequest = false;
303-
synchronized (requestsToFail) {
304-
if (requestsToFail.contains(requestId)) {
305-
failRequest = true;
306-
} else if (requestsToFail.size() < numberToFail) {
307-
requestsToFail.add(requestId);
308-
failRequest = true;
309-
}
310-
}
311-
if (failRequest) {
312-
exchange.sendResponseHeaders(500, -1);
313-
} else {
314-
delegate.handle(exchange);
315-
}
316-
};
317-
}
318-
319296
private void clearMetrics(String discoveryNode) {
320297
internalCluster().getInstance(PluginsService.class, discoveryNode)
321298
.filterPlugins(TestTelemetryPlugin.class)
@@ -480,80 +457,4 @@ private void assertMatchingMetricRecorded(MetricType metricType, String metricNa
480457
assertion.accept(measurement);
481458
}
482459
}
483-
484-
@SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
485-
private static class ResponseInjectingAzureHttpHandler implements DelegatingHttpHandler {
486-
487-
private final HttpHandler delegate;
488-
private final Queue<RequestHandler> requestHandlerQueue;
489-
490-
ResponseInjectingAzureHttpHandler(Queue<RequestHandler> requestHandlerQueue, HttpHandler delegate) {
491-
this.delegate = delegate;
492-
this.requestHandlerQueue = requestHandlerQueue;
493-
}
494-
495-
@Override
496-
public void handle(HttpExchange exchange) throws IOException {
497-
RequestHandler nextHandler = requestHandlerQueue.peek();
498-
if (nextHandler != null && nextHandler.matchesRequest(exchange)) {
499-
requestHandlerQueue.poll().writeResponse(exchange, delegate);
500-
} else {
501-
delegate.handle(exchange);
502-
}
503-
}
504-
505-
@Override
506-
public HttpHandler getDelegate() {
507-
return delegate;
508-
}
509-
}
510-
511-
@SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
512-
@FunctionalInterface
513-
private interface RequestHandler {
514-
void writeResponse(HttpExchange exchange, HttpHandler delegate) throws IOException;
515-
516-
default boolean matchesRequest(HttpExchange exchange) {
517-
return true;
518-
}
519-
}
520-
521-
@SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
522-
private static class FixedRequestHandler implements RequestHandler {
523-
524-
private final RestStatus status;
525-
private final String responseBody;
526-
private final Predicate<HttpExchange> requestMatcher;
527-
528-
FixedRequestHandler(RestStatus status) {
529-
this(status, null, req -> true);
530-
}
531-
532-
/**
533-
* Create a handler that only gets executed for requests that match the supplied predicate. Note
534-
* that because the errors are stored in a queue this will prevent any subsequently queued errors from
535-
* being returned until after it returns.
536-
*/
537-
FixedRequestHandler(RestStatus status, String responseBody, Predicate<HttpExchange> requestMatcher) {
538-
this.status = status;
539-
this.responseBody = responseBody;
540-
this.requestMatcher = requestMatcher;
541-
}
542-
543-
@Override
544-
public boolean matchesRequest(HttpExchange exchange) {
545-
return requestMatcher.test(exchange);
546-
}
547-
548-
@Override
549-
public void writeResponse(HttpExchange exchange, HttpHandler delegateHandler) throws IOException {
550-
if (responseBody != null) {
551-
byte[] responseBytes = responseBody.getBytes(StandardCharsets.UTF_8);
552-
exchange.sendResponseHeaders(status.getStatus(), responseBytes.length);
553-
exchange.getResponseBody().write(responseBytes);
554-
} else {
555-
exchange.sendResponseHeaders(status.getStatus(), -1);
556-
}
557-
}
558-
}
559460
}

modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import org.elasticsearch.common.settings.Settings;
3131
import org.elasticsearch.common.unit.ByteSizeUnit;
3232
import org.elasticsearch.common.unit.ByteSizeValue;
33-
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
3433
import org.elasticsearch.core.SuppressForbidden;
3534
import org.elasticsearch.plugins.Plugin;
3635
import org.elasticsearch.plugins.PluginsService;
@@ -54,13 +53,11 @@
5453
import java.util.HashMap;
5554
import java.util.List;
5655
import java.util.Map;
57-
import java.util.Set;
58-
import java.util.concurrent.atomic.LongAdder;
5956
import java.util.function.Predicate;
6057
import java.util.regex.Pattern;
6158
import java.util.stream.Collectors;
6259

63-
import static org.elasticsearch.repositories.RepositoriesMetrics.METRIC_OPERATIONS_TOTAL;
60+
import static org.elasticsearch.repositories.RepositoriesMetrics.METRIC_REQUESTS_TOTAL;
6461
import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose;
6562
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
6663
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@@ -230,21 +227,13 @@ protected String requestUniqueId(final HttpExchange exchange) {
230227
*/
231228
@SuppressForbidden(reason = "this test uses a HttpServer to emulate an Azure endpoint")
232229
private static class AzureHTTPStatsCollectorHandler extends HttpStatsCollectorHandler {
233-
private final Set<String> seenRequestIds = ConcurrentCollections.newConcurrentSet();
234230

235231
private AzureHTTPStatsCollectorHandler(HttpHandler delegate) {
236232
super(delegate);
237233
}
238234

239235
@Override
240236
protected void maybeTrack(String request, Headers headers) {
241-
// Same request id is a retry
242-
// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-ncnbi/817da997-30d2-4cd3-972f-a0073e4e98f7
243-
// Do not count retries since the client side request stats do not track them yet.
244-
// See https://github.com/elastic/elasticsearch/issues/104443
245-
if (false == seenRequestIds.add(headers.getFirst("X-ms-client-request-id"))) {
246-
return;
247-
}
248237
if (GET_BLOB_PATTERN.test(request)) {
249238
trackRequest("GetBlob");
250239
} else if (Regex.simpleMatch("HEAD /*/*/*", request)) {
@@ -393,14 +382,14 @@ public void testMetrics() throws Exception {
393382
}
394383

395384
final AzureBlobStore blobStore = (AzureBlobStore) blobStoreRepository.blobStore();
396-
final Map<AzureBlobStore.StatsKey, LongAdder> statsCollectors = blobStore.getMetricsRecorder().opsCounters;
385+
final Map<AzureBlobStore.StatsKey, AzureBlobStore.StatsCounter> statsCounters = blobStore.getMetricsRecorder().statsCounters;
397386

398387
final List<Measurement> metrics = Measurement.combine(
399-
getTelemetryPlugin(nodeName).getLongCounterMeasurement(METRIC_OPERATIONS_TOTAL)
388+
getTelemetryPlugin(nodeName).getLongCounterMeasurement(METRIC_REQUESTS_TOTAL)
400389
);
401390

402391
assertThat(
403-
statsCollectors.keySet().stream().map(AzureBlobStore.StatsKey::operation).collect(Collectors.toSet()),
392+
statsCounters.keySet().stream().map(AzureBlobStore.StatsKey::operation).collect(Collectors.toSet()),
404393
equalTo(
405394
metrics.stream()
406395
.map(m -> AzureBlobStore.Operation.fromKey((String) m.attributes().get("operation")))
@@ -417,8 +406,12 @@ public void testMetrics() throws Exception {
417406
operation,
418407
OperationPurpose.parse((String) metric.attributes().get("purpose"))
419408
);
420-
assertThat(nodeName + "/" + statsKey + " exists", statsCollectors, hasKey(statsKey));
421-
assertThat(nodeName + "/" + statsKey + " has correct sum", metric.getLong(), equalTo(statsCollectors.get(statsKey).sum()));
409+
assertThat(nodeName + "/" + statsKey + " exists", statsCounters, hasKey(statsKey));
410+
assertThat(
411+
nodeName + "/" + statsKey + " has correct sum",
412+
metric.getLong(),
413+
equalTo(statsCounters.get(statsKey).requests().sum())
414+
);
422415
aggregatedMetrics.compute(statsKey.operation(), (k, v) -> v == null ? metric.getLong() : v + metric.getLong());
423416
});
424417
}

modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.elasticsearch.common.blobstore.BlobContainer;
5353
import org.elasticsearch.common.blobstore.BlobPath;
5454
import org.elasticsearch.common.blobstore.BlobStore;
55+
import org.elasticsearch.common.blobstore.BlobStoreActionStats;
5556
import org.elasticsearch.common.blobstore.DeleteResult;
5657
import org.elasticsearch.common.blobstore.OperationPurpose;
5758
import org.elasticsearch.common.blobstore.OptionalBytesReference;
@@ -695,7 +696,7 @@ private AzureBlobServiceClient getAzureBlobServiceClientClient(OperationPurpose
695696
}
696697

697698
@Override
698-
public Map<String, Long> stats() {
699+
public Map<String, BlobStoreActionStats> stats() {
699700
return requestMetricsRecorder.statsMap(service.isStateless());
700701
}
701702

@@ -737,27 +738,39 @@ public String toString() {
737738
}
738739
}
739740

741+
// visible for testing
742+
record StatsCounter(LongAdder operations, LongAdder requests) {
743+
StatsCounter() {
744+
this(new LongAdder(), new LongAdder());
745+
}
746+
747+
BlobStoreActionStats getEndpointStats() {
748+
return new BlobStoreActionStats(operations.sum(), requests.sum());
749+
}
750+
}
751+
740752
// visible for testing
741753
class RequestMetricsRecorder {
742754
private final RepositoriesMetrics repositoriesMetrics;
743-
final Map<StatsKey, LongAdder> opsCounters = new ConcurrentHashMap<>();
755+
final Map<StatsKey, StatsCounter> statsCounters = new ConcurrentHashMap<>();
744756
final Map<StatsKey, Map<String, Object>> opsAttributes = new ConcurrentHashMap<>();
745757

746758
RequestMetricsRecorder(RepositoriesMetrics repositoriesMetrics) {
747759
this.repositoriesMetrics = repositoriesMetrics;
748760
}
749761

750-
Map<String, Long> statsMap(boolean stateless) {
762+
Map<String, BlobStoreActionStats> statsMap(boolean stateless) {
751763
if (stateless) {
752-
return opsCounters.entrySet()
764+
return statsCounters.entrySet()
753765
.stream()
754-
.collect(Collectors.toUnmodifiableMap(e -> e.getKey().toString(), e -> e.getValue().sum()));
766+
.collect(Collectors.toUnmodifiableMap(e -> e.getKey().toString(), e -> e.getValue().getEndpointStats()));
755767
} else {
756-
Map<String, Long> normalisedStats = Arrays.stream(Operation.values()).collect(Collectors.toMap(Operation::getKey, o -> 0L));
757-
opsCounters.forEach(
768+
Map<String, BlobStoreActionStats> normalisedStats = Arrays.stream(Operation.values())
769+
.collect(Collectors.toMap(Operation::getKey, o -> BlobStoreActionStats.ZERO));
770+
statsCounters.forEach(
758771
(key, value) -> normalisedStats.compute(
759772
key.operation.getKey(),
760-
(k, current) -> Objects.requireNonNull(current) + value.sum()
773+
(k, current) -> value.getEndpointStats().add(Objects.requireNonNull(current))
761774
)
762775
);
763776
return Map.copyOf(normalisedStats);
@@ -766,13 +779,14 @@ Map<String, Long> statsMap(boolean stateless) {
766779

767780
public void onRequestComplete(Operation operation, OperationPurpose purpose, AzureClientProvider.RequestMetrics requestMetrics) {
768781
final StatsKey statsKey = new StatsKey(operation, purpose);
769-
final LongAdder counter = opsCounters.computeIfAbsent(statsKey, k -> new LongAdder());
782+
final StatsCounter counter = statsCounters.computeIfAbsent(statsKey, k -> new StatsCounter());
770783
final Map<String, Object> attributes = opsAttributes.computeIfAbsent(
771784
statsKey,
772785
k -> RepositoriesMetrics.createAttributesMap(repositoryMetadata, purpose, operation.getKey())
773786
);
774787

775-
counter.add(1);
788+
counter.operations.increment();
789+
counter.requests.add(requestMetrics.getRequestCount());
776790

777791
// range not satisfied is not retried, so we count them by checking the final response
778792
if (requestMetrics.getStatusCode() == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) {

0 commit comments

Comments
 (0)