Skip to content

Commit b8d8227

Browse files
authored
[Test] Reduce amount of work and concurrency for test stability (#91489) (#91534)
The test fails from time to time because not every thread finishes the work within 1 second of waiting time. It is possible that the work is just sometimes taking too long to complete. The getBitSet method internally acquires a read lock, have concurrent read/write access to cache, concurrentHashMap, atomicLong and a log call that may go to the disk. It is possible some of these can be stalled and the delay accumulates to exceed more than 1 second. This PR reduces the amount of work by decrease the number of loops and reduces the concurrency by having less maximum number of concurrent threads. It also catches any exception that gets thrown in the worker threads and report it when the test fails (otherwise, the exception is swallow and the final failure shows only timeout). The test took quite long (1m20s on locally) to complete even with when concurrentThreads and numberOfIndices are randomized to be the minimum (5 and 3 respectively). With the reduction, it now takes 28 seconds which is also a gain. Because how long the test takes, it is also viable in the future to bump the wait time should it fails again. Resolves: #91471
1 parent 69a6011 commit b8d8227

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,12 @@ public void testIndexLookupIsClearedWhenBitSetIsEvicted() throws Exception {
359359
});
360360
}
361361

362-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/91471")
363362
public void testCacheUnderConcurrentAccess() throws Exception {
364363
// This value is based on the internal implementation details of lucene's FixedBitSet
365364
// If the implementation changes, this can be safely updated to match the new ram usage for a single bitset
366365
final long expectedBytesPerBitSet = 56;
367366

368-
final int concurrentThreads = randomIntBetween(5, 15);
367+
final int concurrentThreads = randomIntBetween(5, 8);
369368
final int numberOfIndices = randomIntBetween(3, 8);
370369

371370
// Force cache evictions by setting the size to be less than the number of distinct queries we search on.
@@ -396,27 +395,39 @@ public void testCacheUnderConcurrentAccess() throws Exception {
396395
final CountDownLatch start = new CountDownLatch(concurrentThreads);
397396
final CountDownLatch end = new CountDownLatch(concurrentThreads);
398397
final Set<BitSet> uniqueBitSets = Collections.synchronizedSet(Collections.newSetFromMap(new IdentityHashMap<>()));
398+
final AtomicReference<Throwable> exceptionInThread = new AtomicReference<>();
399399
for (int thread = 0; thread < concurrentThreads; thread++) {
400400
threads.submit(() -> {
401-
start.countDown();
402-
start.await(100, TimeUnit.MILLISECONDS);
403-
for (int loop = 0; loop < 15; loop++) {
404-
for (int field = 1; field <= FIELD_COUNT; field++) {
405-
final TermQueryBuilder queryBuilder = QueryBuilders.termQuery("field-" + field, "value-" + field);
406-
final TestIndexContext randomContext = randomFrom(contexts);
407-
final Query query = queryBuilder.toQuery(randomContext.searchExecutionContext);
408-
final BitSet bitSet = cache.getBitSet(query, randomContext.leafReaderContext);
409-
assertThat(bitSet, notNullValue());
410-
assertThat(bitSet.ramBytesUsed(), equalTo(expectedBytesPerBitSet));
411-
uniqueBitSets.add(bitSet);
401+
try {
402+
start.countDown();
403+
if (false == start.await(100, TimeUnit.MILLISECONDS)) {
404+
// We still proceed even when some threads are not ready. All threads being ready increases the chance
405+
// of them running concurrently and competing for caching. But this is not guaranteed either way.
406+
logger.info("[{}] out of [{}] worker threads are ready", start.getCount(), concurrentThreads);
412407
}
408+
for (int loop = 0; loop < 5; loop++) {
409+
for (int field = 1; field <= FIELD_COUNT; field++) {
410+
final TermQueryBuilder queryBuilder = QueryBuilders.termQuery("field-" + field, "value-" + field);
411+
final TestIndexContext randomContext = randomFrom(contexts);
412+
final Query query = queryBuilder.toQuery(randomContext.searchExecutionContext);
413+
final BitSet bitSet = cache.getBitSet(query, randomContext.leafReaderContext);
414+
assertThat(bitSet, notNullValue());
415+
assertThat(bitSet.ramBytesUsed(), equalTo(expectedBytesPerBitSet));
416+
uniqueBitSets.add(bitSet);
417+
}
418+
}
419+
end.countDown();
420+
} catch (Throwable e) {
421+
logger.warn("caught exception in worker thread", e);
422+
exceptionInThread.compareAndSet(null, e);
413423
}
414-
end.countDown();
415424
return null;
416425
});
417426
}
418427

419-
assertTrue("Query threads did not complete in expected time", end.await(1, TimeUnit.SECONDS));
428+
if (false == end.await(1, TimeUnit.SECONDS)) {
429+
fail("Query threads did not complete in expected time. Possible exception [" + exceptionInThread.get() + "]");
430+
}
420431

421432
threads.shutdown();
422433
assertTrue("Cleanup thread did not complete in expected time", threads.awaitTermination(3, TimeUnit.SECONDS));

0 commit comments

Comments
 (0)