Skip to content

Commit 33cbb8e

Browse files
authored
Add more logging to the real memory circuit breaker and lower minimum interval (#102396) (#102443)
It lowers the minimumInterval from 5000ms to 500ms as we observed high number of CBEs when there are bursting allocations in newer JDKs.
1 parent 5bb05c3 commit 33cbb8e

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

docs/changelog/102396.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 102396
2+
summary: Add more logging to the real memory circuit breaker and lower minimum interval
3+
area: "Infra/Circuit Breakers"
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerService.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ static OverLimitStrategy createOverLimitStrategy(boolean trackRealMemoryUsage) {
520520
HierarchyCircuitBreakerService::realMemoryUsage,
521521
createYoungGcCountSupplier(),
522522
System::currentTimeMillis,
523-
5000,
523+
500,
524524
lockTimeout
525525
);
526526
} else {
@@ -561,6 +561,8 @@ static class G1OverLimitStrategy implements OverLimitStrategy {
561561

562562
private long blackHole;
563563
private final ReleasableLock lock = new ReleasableLock(new ReentrantLock());
564+
// used to throttle logging
565+
private int attemptNo;
564566

565567
G1OverLimitStrategy(
566568
JvmInfo jvmInfo,
@@ -607,9 +609,12 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
607609
boolean leader = false;
608610
int allocationIndex = 0;
609611
long allocationDuration = 0;
612+
long begin = 0;
613+
int attemptNoCopy = 0;
610614
try (ReleasableLock locked = lock.tryAcquire(lockTimeout)) {
611615
if (locked != null) {
612-
long begin = timeSupplier.getAsLong();
616+
attemptNoCopy = ++this.attemptNo;
617+
begin = timeSupplier.getAsLong();
613618
leader = begin >= lastCheckTime + minimumInterval;
614619
overLimitTriggered(leader);
615620
if (leader) {
@@ -641,9 +646,11 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
641646
long now = timeSupplier.getAsLong();
642647
this.lastCheckTime = now;
643648
allocationDuration = now - begin;
649+
this.attemptNo = 0;
644650
}
645651
}
646652
} catch (InterruptedException e) {
653+
logger.info("could not acquire lock when attempting to trigger G1GC due to high heap usage");
647654
Thread.currentThread().interrupt();
648655
// fallthrough
649656
}
@@ -658,6 +665,13 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
658665
allocationIndex,
659666
allocationDuration
660667
);
668+
} else if (attemptNoCopy < 10 || Long.bitCount(attemptNoCopy) == 1) {
669+
logger.info(
670+
"memory usage down after [{}], before [{}], after [{}]",
671+
begin - lastCheckTime,
672+
memoryUsed.baseUsage,
673+
current
674+
);
661675
}
662676
return new MemoryUsage(
663677
current,
@@ -674,6 +688,13 @@ public MemoryUsage overLimit(MemoryUsage memoryUsed) {
674688
allocationIndex,
675689
allocationDuration
676690
);
691+
} else if (attemptNoCopy < 10 || Long.bitCount(attemptNoCopy) == 1) {
692+
logger.info(
693+
"memory usage not down after [{}], before [{}], after [{}]",
694+
begin - lastCheckTime,
695+
memoryUsed.baseUsage,
696+
current
697+
);
677698
}
678699
// prefer original measurement when reporting if heap usage was not brought down.
679700
return memoryUsed;

0 commit comments

Comments
 (0)