Skip to content

Commit 397821e

Browse files
Add epoch blob-cache metric (#132547)
Add epoch (changes) as a metric to the blob-cache, giving visibility into the velocity of change in the cache and indications of cache churn
1 parent a684ef9 commit 397821e

File tree

4 files changed

+37
-4
lines changed

4 files changed

+37
-4
lines changed

docs/changelog/132547.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132547
2+
summary: Add epoch blob-cache metric
3+
area: Searchable Snapshots
4+
type: enhancement
5+
issues: []

x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/BlobCacheMetrics.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public class BlobCacheMetrics {
4242

4343
private final LongAdder missCount = new LongAdder();
4444
private final LongAdder readCount = new LongAdder();
45+
private final LongCounter epochChanges;
4546

4647
public enum CachePopulationReason {
4748
/**
@@ -98,7 +99,8 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) {
9899
"es.blob_cache.population.time.total",
99100
"The time spent copying data into the cache",
100101
"milliseconds"
101-
)
102+
),
103+
meterRegistry.registerLongCounter("es.blob_cache.epoch.total", "The epoch changes of the LFU cache", "count")
102104
);
103105

104106
meterRegistry.registerLongGauge(
@@ -134,7 +136,8 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) {
134136
LongHistogram cacheMissLoadTimes,
135137
DoubleHistogram cachePopulationThroughput,
136138
LongCounter cachePopulationBytes,
137-
LongCounter cachePopulationTime
139+
LongCounter cachePopulationTime,
140+
LongCounter epochChanges
138141
) {
139142
this.cacheMissCounter = cacheMissCounter;
140143
this.evictedCountNonZeroFrequency = evictedCountNonZeroFrequency;
@@ -143,6 +146,7 @@ public BlobCacheMetrics(MeterRegistry meterRegistry) {
143146
this.cachePopulationThroughput = cachePopulationThroughput;
144147
this.cachePopulationBytes = cachePopulationBytes;
145148
this.cachePopulationTime = cachePopulationTime;
149+
this.epochChanges = epochChanges;
146150
}
147151

148152
public static final BlobCacheMetrics NOOP = new BlobCacheMetrics(TelemetryProvider.NOOP.getMeterRegistry());
@@ -201,6 +205,10 @@ public void recordCachePopulationMetrics(
201205
}
202206
}
203207

208+
public void recordEpochChange() {
209+
epochChanges.increment();
210+
}
211+
204212
public void recordRead() {
205213
readCount.increment();
206214
}

x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/shared/SharedBlobCacheService.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,6 +2049,7 @@ public void onFailure(Exception e) {
20492049
public void onAfter() {
20502050
assert pendingEpoch.get() == epoch.get() + 1;
20512051
epoch.incrementAndGet();
2052+
blobCacheMetrics.recordEpochChange();
20522053
}
20532054

20542055
@Override

x-pack/plugin/blob-cache/src/test/java/org/elasticsearch/blobcache/shared/SharedBlobCacheServiceTests.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.env.TestEnvironment;
3636
import org.elasticsearch.node.NodeRoleSettings;
3737
import org.elasticsearch.telemetry.InstrumentType;
38+
import org.elasticsearch.telemetry.Measurement;
3839
import org.elasticsearch.telemetry.RecordingMeterRegistry;
3940
import org.elasticsearch.test.ESTestCase;
4041
import org.elasticsearch.threadpool.TestThreadPool;
@@ -332,6 +333,8 @@ public void testAsynchronousEviction() throws Exception {
332333
}
333334

334335
public void testDecay() throws IOException {
336+
RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry();
337+
BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry);
335338
// we have 8 regions
336339
Settings settings = Settings.builder()
337340
.put(NODE_NAME_SETTING.getKey(), "node")
@@ -347,7 +350,7 @@ public void testDecay() throws IOException {
347350
settings,
348351
taskQueue.getThreadPool(),
349352
taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC),
350-
BlobCacheMetrics.NOOP
353+
metrics
351354
)
352355
) {
353356
assertEquals(4, cacheService.freeRegionCount());
@@ -375,6 +378,8 @@ public void testDecay() throws IOException {
375378
assertThat(taskQueue.hasRunnableTasks(), is(true));
376379
taskQueue.runAllRunnableTasks();
377380
assertThat(cacheService.epoch(), equalTo(expectedEpoch.incrementAndGet()));
381+
long epochs = recordedEpochs(recordingMeterRegistry);
382+
assertEquals(cacheService.epoch(), epochs);
378383
};
379384

380385
triggerDecay.run();
@@ -435,11 +440,22 @@ public void testDecay() throws IOException {
435440
}
436441
}
437442

443+
private static long recordedEpochs(RecordingMeterRegistry recordingMeterRegistry) {
444+
long epochs = recordingMeterRegistry.getRecorder()
445+
.getMeasurements(InstrumentType.LONG_COUNTER, "es.blob_cache.epoch.total")
446+
.stream()
447+
.mapToLong(Measurement::getLong)
448+
.sum();
449+
return epochs;
450+
}
451+
438452
/**
439453
* Test when many objects need to decay, in particular useful to measure how long the decay task takes.
440454
* For 1M objects (with no assertions) it took 26ms locally.
441455
*/
442456
public void testMassiveDecay() throws IOException {
457+
RecordingMeterRegistry recordingMeterRegistry = new RecordingMeterRegistry();
458+
BlobCacheMetrics metrics = new BlobCacheMetrics(recordingMeterRegistry);
443459
int regions = 1024; // to measure decay time, increase to 1024*1024 and disable assertions.
444460
Settings settings = Settings.builder()
445461
.put(NODE_NAME_SETTING.getKey(), "node")
@@ -455,7 +471,7 @@ public void testMassiveDecay() throws IOException {
455471
settings,
456472
taskQueue.getThreadPool(),
457473
taskQueue.getThreadPool().executor(ThreadPool.Names.GENERIC),
458-
BlobCacheMetrics.NOOP
474+
metrics
459475
)
460476
) {
461477
Runnable decay = () -> {
@@ -496,6 +512,9 @@ public void testMassiveDecay() throws IOException {
496512
}
497513
}
498514
assertThat(freqs.get(4), equalTo(regions - maxRounds + 1));
515+
516+
long epochs = recordedEpochs(recordingMeterRegistry);
517+
assertEquals(cacheService.epoch(), epochs);
499518
}
500519
}
501520

0 commit comments

Comments
 (0)