Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/133314.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 133314
summary: Add hits and misses timing stats to DLS cache
area: Authorization
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,11 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.LongSupplier;

/**
* This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
Expand Down Expand Up @@ -122,18 +125,27 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
private final Cache<BitsetCacheKey, BitSet> bitsetCache;
private final Map<IndexReader.CacheKey, Set<BitsetCacheKey>> keysByIndex;
private final AtomicLong cacheFullWarningTime;
private final LongAdder hitsTimeInNanos;
private final LongAdder missesTimeInNanos;
private final LongSupplier relativeNanoTimeProvider;

public DocumentSubsetBitsetCache(Settings settings, ThreadPool threadPool) {
this(settings, threadPool.executor(ThreadPool.Names.GENERIC));
}

// visible for testing
DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupExecutor) {
this(settings, cleanupExecutor, System::nanoTime);
}

/**
* @param settings The global settings object for this node
* @param cleanupExecutor An executor on which the cache cleanup tasks can be run. Due to the way the cache is structured internally,
* it is sometimes necessary to run an asynchronous task to synchronize the internal state.
* @param relativeNanoTimeProvider Provider of nanos for code that needs to measure relative time.
*/
// visible for testing
DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupExecutor) {
DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupExecutor, LongSupplier relativeNanoTimeProvider) {
final ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
this.cacheEvictionLock = new ReleasableLock(readWriteLock.writeLock());
this.cacheModificationLock = new ReleasableLock(readWriteLock.readLock());
Expand All @@ -150,6 +162,9 @@ public DocumentSubsetBitsetCache(Settings settings, ThreadPool threadPool) {

this.keysByIndex = new ConcurrentHashMap<>();
this.cacheFullWarningTime = new AtomicLong(0);
this.hitsTimeInNanos = new LongAdder();
this.missesTimeInNanos = new LongAdder();
this.relativeNanoTimeProvider = Objects.requireNonNull(relativeNanoTimeProvider);
}

@Override
Expand Down Expand Up @@ -222,6 +237,8 @@ public long ramBytesUsed() {
*/
@Nullable
public BitSet getBitSet(final Query query, final LeafReaderContext context) throws ExecutionException {
final long startTimeNs = relativeNanoTimeProvider.getAsLong();

final IndexReader.CacheHelper coreCacheHelper = context.reader().getCoreCacheHelper();
if (coreCacheHelper == null) {
try {
Expand All @@ -235,7 +252,9 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
final BitsetCacheKey cacheKey = new BitsetCacheKey(indexKey, query);

try (ReleasableLock ignored = cacheModificationLock.acquire()) {
final AtomicBoolean cacheKeyWasPresent = new AtomicBoolean(true);
Copy link
Contributor

@joegallo joegallo Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not introduce an AtomicBoolean for this. You're single threaded here anyway (right?), so this could just be an ordinary old mutable boolean rather than a final. Beyond that, though, I suspect I'd prefer if this were done by threading the logic in where it needs to be rather than separating it (and needing a variable at all). edit: disregard that, the other places where we don't do this are patterned differently. It's interesting to me that these three caches have such different little details in some places.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, the problem is the callback and the need for it to be a final variable -- there's a pattern of using final 1-length arrays as final-but-mutable box for this.

Copy link
Contributor Author

@szybia szybia Aug 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah seen the cacheKeyWasPresent[0] = false; approach before

no strong opinions so fine with it, but was my first time seeing it and was a bit confused initially

so went with this since more readable imo

i'd be curious to know whether there were any previous issues around AtomicBoolean, from ignorance and intuition given zero contention and this just being a object allocation, can't imagine many problems here

anyway 🤷

final BitSet bitSet = bitsetCache.computeIfAbsent(cacheKey, ignore1 -> {
cacheKeyWasPresent.set(false);
// This ensures all insertions into the set are guarded by ConcurrentHashMap's atomicity guarantees.
keysByIndex.compute(indexKey, (ignore2, set) -> {
if (set == null) {
Expand Down Expand Up @@ -264,6 +283,11 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
}
return result;
});
if (cacheKeyWasPresent.get()) {
hitsTimeInNanos.add(relativeNanoTimeProvider.getAsLong() - startTimeNs);
} else {
missesTimeInNanos.add(relativeNanoTimeProvider.getAsLong() - startTimeNs);
}
if (bitSet == NULL_MARKER) {
return null;
} else {
Expand Down Expand Up @@ -331,6 +355,8 @@ public Map<String, Object> usageStats() {
stats.put("hits", cacheStats.getHits());
stats.put("misses", cacheStats.getMisses());
stats.put("evictions", cacheStats.getEvictions());
stats.put("hits_time_in_millis", TimeValue.nsecToMSec(hitsTimeInNanos.sum()));
stats.put("misses_time_in_millis", TimeValue.nsecToMSec(missesTimeInNanos.sum()));
return Collections.unmodifiableMap(stats);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.LongSupplier;
import java.util.function.Supplier;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -536,6 +538,8 @@ public void testHitsMissesAndEvictionsStats() throws Exception {
stats.put("hits", 0L);
stats.put("misses", 0L);
stats.put("evictions", 0L);
stats.put("hits_time_in_millis", 0L);
stats.put("misses_time_in_millis", 0L);
return stats;
};

Expand All @@ -551,12 +555,14 @@ public void testHitsMissesAndEvictionsStats() throws Exception {
expectedStats.put("misses", 1L);
expectedStats.put("memory", EXPECTED_BYTES_PER_BIT_SET + "b");
expectedStats.put("memory_in_bytes", EXPECTED_BYTES_PER_BIT_SET);
expectedStats.put("misses_time_in_millis", 1L);
assertThat(cache.usageStats(), equalTo(expectedStats));

// second same lookup - hit
final BitSet bitSet1Again = cache.getBitSet(query1, leafContext);
assertThat(bitSet1Again, sameInstance(bitSet1));
expectedStats.put("hits", 1L);
expectedStats.put("hits_time_in_millis", 1L);
assertThat(cache.usageStats(), equalTo(expectedStats));

// second query - miss, should evict the first one
Expand All @@ -568,13 +574,18 @@ public void testHitsMissesAndEvictionsStats() throws Exception {
// see https://github.com/elastic/elasticsearch/issues/132842
expectedStats.put("misses", 3L);
expectedStats.put("evictions", 1L);
// underlying Cache class tracks hits/misses, but timing is in DLS cache, which is why we have `2L` here,
// because DLS cache is only hit once
expectedStats.put("misses_time_in_millis", 2L);
assertBusy(() -> { assertThat(cache.usageStats(), equalTo(expectedStats)); }, 200, TimeUnit.MILLISECONDS);
});

final Map<String, Object> finalStats = emptyStatsSupplier.get();
finalStats.put("hits", 1L);
finalStats.put("misses", 3L);
finalStats.put("evictions", 2L);
finalStats.put("hits_time_in_millis", 1L);
finalStats.put("misses_time_in_millis", 2L);
assertThat(cache.usageStats(), equalTo(finalStats));
}

Expand Down Expand Up @@ -697,6 +708,8 @@ private void runTestOnIndices(int numberIndices, CheckedConsumer<List<TestIndexC
}

private DocumentSubsetBitsetCache newCache(Settings settings) {
return new DocumentSubsetBitsetCache(settings, singleThreadExecutor);
final AtomicLong increasingMillisTime = new AtomicLong();
final LongSupplier relativeNanoTimeProvider = () -> TimeUnit.MILLISECONDS.toNanos(increasingMillisTime.getAndIncrement());
return new DocumentSubsetBitsetCache(settings, singleThreadExecutor, relativeNanoTimeProvider);
}
}