Skip to content

Commit da9f6d9

Browse files
committed
[GR-59133] Minimum RSS threshold to do background GC
PullRequest: graalpython/3523
2 parents baec9ea + 8dec8c0 commit da9f6d9

File tree

3 files changed

+42
-43
lines changed

3 files changed

+42
-43
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,9 @@ private static final class BackgroundGCTask implements Runnable {
607607

608608
private BackgroundGCTask(PythonContext context) {
609609
this.ctx = new WeakReference<>(context);
610+
this.rssInterval = context.getOption(PythonOptions.BackgroundGCTaskInterval);
611+
this.gcRSSThreshold = context.getOption(PythonOptions.BackgroundGCTaskThreshold) / (double) 100;
612+
this.gcRSSMinimum = context.getOption(PythonOptions.BackgroundGCTaskMinimum);
610613
}
611614

612615
Object nativeSymbol = null;
@@ -619,21 +622,26 @@ private BackgroundGCTask(PythonContext context) {
619622
final WeakReference<PythonContext> ctx;
620623

621624
// RSS monitor interval in ms
622-
static final int RSS_INTERVAL = Integer.getInteger("python.RSSInterval", 1000);
625+
final int rssInterval;
623626
/**
624627
* RSS percentage increase between System.gc() calls. Low percentage will trigger
625628
* System.gc() more often which can cause unnecessary overhead.
626629
*
627630
* <ul>
628-
* why 30%? it's purely based on the {@code huggingface} example.
631+
* Based on the {@code huggingface} example:
629632
* <li>less than 30%: max RSS ~22GB (>200 second per iteration)</li>
630633
* <li>30%: max RSS ~24GB (~150 second per iteration)</li>
631634
* <li>larger than 30%: max RSS ~38GB (~140 second per iteration)</li>
632635
* </ul>
633636
*
634637
* <pre>
635638
*/
636-
static final double GC_RSS_THRESHOLD = Integer.getInteger("python.RSSThreshold", 30) / 100.0;
639+
final double gcRSSThreshold;
640+
641+
/**
642+
* RSS minimum memory (in megabytes) start calling System.gc(). Default is 4GB.
643+
*/
644+
final double gcRSSMinimum;
637645

638646
Long getCurrentRSS() {
639647
if (nativeSymbol == null) {
@@ -652,7 +660,7 @@ Long getCurrentRSS() {
652660
public void run() {
653661
try {
654662
while (true) {
655-
Thread.sleep(RSS_INTERVAL);
663+
Thread.sleep(rssInterval);
656664
perform();
657665
}
658666
} catch (InterruptedException e) {
@@ -680,6 +688,10 @@ private void perform() {
680688
return;
681689
}
682690

691+
if (rss < gcRSSMinimum) {
692+
return;
693+
}
694+
683695
// skip GC if no new native weakrefs have been created.
684696
int currentWeakrefCount = context.nativeContext.nativeLookup.size();
685697
if (currentWeakrefCount < this.previousWeakrefCount || this.previousWeakrefCount == -1) {
@@ -688,15 +700,15 @@ private void perform() {
688700
}
689701

690702
double ratio = ((rss - this.previousRSS) / (double) this.previousRSS);
691-
if (ratio >= GC_RSS_THRESHOLD) {
703+
if (ratio >= gcRSSThreshold) {
692704
this.previousWeakrefCount = currentWeakrefCount;
693705

694706
long start = System.nanoTime();
695707
PythonUtils.forceFullGC();
696708
long gcTime = (System.nanoTime() - start) / 1000000;
697709

698710
if (LOGGER.isLoggable(Level.FINER)) {
699-
LOGGER.finer(PythonUtils.formatJString("Background GC Task -- GC [%d ms] RSS [%d MB]->[%d MB](%.1f%%)",
711+
LOGGER.info(PythonUtils.formatJString("Background GC Task -- GC [%d ms] RSS [%d MB]->[%d MB](%.1f%%)",
700712
gcTime, previousRSS, rss, ratio * 100));
701713
}
702714
/*
@@ -710,7 +722,7 @@ private void perform() {
710722
* mappings (RssFile) and shmem memory (RssShmem). GC can only reduce RssAnon while
711723
* RssFile is managed by the operating system which doesn't go down easily.
712724
*/
713-
this.previousRSS += (long) (this.previousRSS * GC_RSS_THRESHOLD);
725+
this.previousRSS += (long) (this.previousRSS * gcRSSThreshold);
714726
}
715727
}
716728
}
@@ -722,7 +734,7 @@ public long getCurrentRSS() {
722734
if (rss == -1) {
723735
try {
724736
// in case it just started
725-
Thread.sleep(BackgroundGCTask.RSS_INTERVAL);
737+
Thread.sleep(gcTask.rssInterval);
726738
} catch (InterruptedException e) {
727739
Thread.currentThread().interrupt();
728740
}
@@ -736,15 +748,16 @@ public long getCurrentRSS() {
736748
@SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH") // context.get() is never null here
737749
void runBackgroundGCTask(PythonContext context) {
738750
CompilerAsserts.neverPartOfCompilation();
739-
if (ImageInfo.inImageBuildtimeCode() || context.getOption(PythonOptions.NoAsyncActions)) {
751+
if (ImageInfo.inImageBuildtimeCode() //
752+
|| context.getOption(PythonOptions.NoAsyncActions) //
753+
|| !PythonOptions.AUTOMATIC_ASYNC_ACTIONS //
754+
|| !context.getOption(PythonOptions.BackgroundGCTask)) {
740755
return;
741756
}
742-
if (PythonOptions.AUTOMATIC_ASYNC_ACTIONS) {
743-
backgroundGCTaskThread = context.getEnv().newTruffleThreadBuilder(gcTask).context(context.getEnv().getContext()).build();
744-
backgroundGCTaskThread.setDaemon(true);
745-
backgroundGCTaskThread.setName("python-gc-task");
746-
backgroundGCTaskThread.start();
747-
}
757+
backgroundGCTaskThread = context.getEnv().newTruffleThreadBuilder(gcTask).context(context.getEnv().getContext()).build();
758+
backgroundGCTaskThread.setDaemon(true);
759+
backgroundGCTaskThread.setName("python-gc-task");
760+
backgroundGCTaskThread.start();
748761
}
749762

750763
/**

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import java.util.Arrays;
4949
import java.util.HashMap;
5050
import java.util.HashSet;
51-
import java.util.LinkedList;
5251
import java.util.Set;
5352
import java.util.WeakHashMap;
5453
import java.util.concurrent.ConcurrentHashMap;
@@ -205,20 +204,10 @@ private static HandleContext getContext() {
205204

206205
public abstract static class IdReference<T> extends WeakReference<T> {
207206

208-
private boolean collected = false;
209-
210207
public IdReference(HandleContext handleContext, T referent) {
211208
super(referent, handleContext.referenceQueue);
212209
}
213210

214-
public boolean isCollected() {
215-
return collected;
216-
}
217-
218-
public IdReference<T> setCollected() {
219-
this.collected = true;
220-
return this;
221-
}
222211
}
223212

224213
/**
@@ -448,23 +437,8 @@ public static int pollReferenceQueue() {
448437
}
449438
}
450439
try {
451-
LinkedList<IdReference<?>> manualCleanupQueue = new LinkedList<>();
452-
for (IdReference<?> ref : handleContext.nativeLookup.values()) {
453-
if (ref != null && ref.refersTo(null)) {
454-
manualCleanupQueue.add(ref);
455-
}
456-
}
457-
manuallyCollected = manualCleanupQueue.size();
458440
while (true) {
459-
Object entry;
460-
if (!manualCleanupQueue.isEmpty()) {
461-
entry = manualCleanupQueue.pop().setCollected();
462-
} else {
463-
entry = queue.poll();
464-
if (entry instanceof IdReference<?> ref && ref.isCollected()) {
465-
continue;
466-
}
467-
}
441+
Object entry = queue.poll();
468442
if (entry == null) {
469443
if (count > 0) {
470444
assert handleContext.referenceQueuePollActive;

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import java.util.Map;
4545
import java.util.Optional;
4646

47-
import com.oracle.truffle.api.exception.AbstractTruffleException;
4847
import org.graalvm.options.OptionCategory;
4948
import org.graalvm.options.OptionDescriptor;
5049
import org.graalvm.options.OptionDescriptors;
@@ -64,6 +63,7 @@
6463
import com.oracle.truffle.api.Option;
6564
import com.oracle.truffle.api.TruffleLanguage.Env;
6665
import com.oracle.truffle.api.dsl.Idempotent;
66+
import com.oracle.truffle.api.exception.AbstractTruffleException;
6767
import com.oracle.truffle.api.nodes.ExplodeLoop;
6868
import com.oracle.truffle.api.strings.TruffleString;
6969

@@ -354,6 +354,18 @@ private PythonOptions() {
354354
@Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Whether the Python GC should be enabled (default) or not.") //
355355
public static final OptionKey<Boolean> PythonGC = new OptionKey<>(true);
356356

357+
@Option(category = OptionCategory.INTERNAL, usageSyntax = "true|false", help = "Whether the background GC task should be enabled (default) or not.") //
358+
public static final OptionKey<Boolean> BackgroundGCTask = new OptionKey<>(true);
359+
360+
@Option(category = OptionCategory.INTERNAL, usageSyntax = "<time>", help = "Specifies the interval (ms) for the background GC task to monitor the resident set size (RSS)") //
361+
public static final OptionKey<Integer> BackgroundGCTaskInterval = new OptionKey<>(1000);
362+
363+
@Option(category = OptionCategory.INTERNAL, usageSyntax = "<limit>", help = "The percentage increase in RSS memory between System.gc() calls. Low percentage will trigger System.gc() more often. (default: 30).") //
364+
public static final OptionKey<Integer> BackgroundGCTaskThreshold = new OptionKey<>(30);
365+
366+
@Option(category = OptionCategory.INTERNAL, usageSyntax = "<megabytes>", help = "The minimum RSS memory (in megabytes) to start calling System.gc(). (default: 4 GB).") //
367+
public static final OptionKey<Integer> BackgroundGCTaskMinimum = new OptionKey<>(4096);
368+
357369
@EngineOption @Option(category = OptionCategory.USER, usageSyntax = "true|false", help = "Emulate some Jython features that can cause performance degradation", stability = OptionStability.STABLE) //
358370
public static final OptionKey<Boolean> EmulateJython = new OptionKey<>(false);
359371

0 commit comments

Comments
 (0)