diff --git a/docs/changelog/132547.yaml b/docs/changelog/132547.yaml new file mode 100644 index 0000000000000..c04c15e65e482 --- /dev/null +++ b/docs/changelog/132547.yaml @@ -0,0 +1,5 @@ +pr: 132547 +summary: Add epoch blob-cache metric +area: Searchable Snapshots +type: enhancement +issues: [] diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java index 85f95a32316d9..8b1beb021355a 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java @@ -42,6 +42,7 @@ public class BlobCacheMetrics { private final LongAdder missCount = new LongAdder(); private final LongAdder readCount = new LongAdder(); + private final LongCounter epochChanges; public enum CachePopulationReason { /** @@ -98,7 +99,8 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) { "es.blob_cache.population.time.total", "The time spent copying data into the cache", "milliseconds" - ) + ), + meterRegistry.registerLongCounter("es.blob_cache.epoch.total", "The epoch changes of the LFU cache", "count") ); meterRegistry.registerLongGauge( @@ -134,7 +136,8 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) { LongHistogram cacheMissLoadTimes, DoubleHistogram cachePopulationThroughput, LongCounter cachePopulationBytes, - LongCounter cachePopulationTime + LongCounter cachePopulationTime, + LongCounter epochChanges ) { this.cacheMissCounter = cacheMissCounter; this.evictedCountNonZeroFrequency = evictedCountNonZeroFrequency; @@ -143,6 +146,7 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) { this.cachePopulationThroughput = cachePopulationThroughput; this.cachePopulationBytes = cachePopulationBytes; this.cachePopulationTime = cachePopulationTime; + this.epochChanges = epochChanges; } public static final BlobCacheMetrics NOOP = new BlobCacheMetrics(TelemetryProvider.NOOP.getMeterRegistry()); @@ -201,6 +205,10 @@ public void recordCachePopulationMetrics( } } + public void recordEpochChange() { + epochChanges.increment(); + } + public void recordRead() { readCount.increment(); } diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java index b44d3536ba902..8c04afba0c1c8 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java @@ -2049,6 +2049,7 @@ public void onFailure(Exception e) { public void onAfter() { assert pendingEpoch.get() == epoch.get() + 1; epoch.incrementAndGet(); + blobCacheMetrics.recordEpochChange(); } @Override diff --git a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java index 3a64430e7c3d2..51161e0f5f454 100644 --- a/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java +++ b/x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java @@ -35,6 +35,7 @@ import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.node.NodeRoleSettings; import org.elasticsearch.telemetry.InstrumentType; +import org.elasticsearch.telemetry.Measurement; import org.elasticsearch.telemetry.RecordingMeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -332,6 +333,8 @@ public void testAsynchronousEviction() throws Exception { } public void testDecay() throws IOException { + RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry(); + BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry); // we have 8 regions Settings settings = Settings.builder() .put(NODE_NAME_SETTING.getKey(), "node") @@ -347,7 +350,7 @@ public void testDecay() throws IOException { settings, taskQueue.getThreadPool(), taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC), - BlobCacheMetrics.NOOP + metrics ) ) { assertEquals(4, cacheService.freeRegionCount()); @@ -375,6 +378,8 @@ public void testDecay() throws IOException { assertThat(taskQueue.hasRunnableTasks(), is(true)); taskQueue.runAllRunnableTasks(); assertThat(cacheService.epoch(), equalTo(expectedEpoch.incrementAndGet())); + long epochs = recordedEpochs(recordingMeterRegistry); + assertEquals(cacheService.epoch(), epochs); }; triggerDecay.run(); @@ -435,11 +440,22 @@ public void testDecay() throws IOException { } } + private static long recordedEpochs(RecordingMeterRegistry recordingMeterRegistry) { + long epochs = recordingMeterRegistry.getRecorder() + .getMeasurements(InstrumentType.LONG_COUNTER, "es.blob_cache.epoch.total") + .stream() + .mapToLong(Measurement::getLong) + .sum(); + return epochs; + } + /** * Test when many objects need to decay, in particular useful to measure how long the decay task takes. * For 1M objects (with no assertions) it took 26ms locally. */ public void testMassiveDecay() throws IOException { + RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry(); + BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry); int regions = 1024; // to measure decay time, increase to 1024*1024 and disable assertions. Settings settings = Settings.builder() .put(NODE_NAME_SETTING.getKey(), "node") @@ -455,7 +471,7 @@ public void testMassiveDecay() throws IOException { settings, taskQueue.getThreadPool(), taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC), - BlobCacheMetrics.NOOP + metrics ) ) { Runnable decay = () -> { @@ -496,6 +512,9 @@ public void testMassiveDecay() throws IOException { } } assertThat(freqs.get(4), equalTo(regions - maxRounds + 1)); + + long epochs = recordedEpochs(recordingMeterRegistry); + assertEquals(cacheService.epoch(), epochs); } }