Skip to content

Commit 17b49aa

Browse files
Further fixes.
Improvements for the teardown failure/warning logics.
1 parent 9d3d859 commit 17b49aa

File tree

5 files changed

+152
-143
lines changed

5 files changed

+152
-143
lines changed

substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/SubstrateOptions.java

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
*/
2525
package com.oracle.svm.core;
2626

27-
import static com.oracle.svm.core.SubstrateOptions.DeprecatedOptions.TearDownFailureNanos;
2827
import static com.oracle.svm.core.option.RuntimeOptionKey.RuntimeOptionKeyFlag.Immutable;
2928
import static com.oracle.svm.core.option.RuntimeOptionKey.RuntimeOptionKeyFlag.RegisterForIsolateArgumentParser;
3029
import static com.oracle.svm.core.option.RuntimeOptionKey.RuntimeOptionKeyFlag.RelevantForCompilationIsolates;
@@ -618,17 +617,13 @@ protected void onValueUpdate(EconomicMap<OptionKey<?>, Object> values, Boolean o
618617
}
619618
};
620619

621-
public static final String TEAR_DOWN_WARNING_NANOS_ERROR = "Can't set both TearDownWarningSeconds and TearDownWarningNanos at the same time. Use TearDownWarningSeconds.";
622-
@Option(help = "The number of nanoseconds before and between which tearing down an isolate gives a warning message. 0 implies no warning.", //
620+
@Option(help = "The number of nanoseconds that the isolate teardown can take before warnings are printed. Disabled if less or equal to 0.", //
623621
deprecated = true, deprecationMessage = "Use -XX:TearDownWarningSeconds=<secs> instead")//
624-
public static final RuntimeOptionKey<Long> TearDownWarningNanos = new RuntimeOptionKey<>(0L,
625-
(key) -> UserError.guarantee(!(key.hasBeenSet() && TearDownWarningSeconds.hasBeenSet()), TEAR_DOWN_WARNING_NANOS_ERROR),
626-
RelevantForCompilationIsolates);
622+
public static final RuntimeOptionKey<Long> TearDownWarningNanos = new RuntimeOptionKey<>(0L, RelevantForCompilationIsolates);
627623

628-
@Option(help = "The number of nanoseconds before tearing down an isolate gives a failure message and returns from a tear-down call. 0 implies no message.", //
629-
deprecated = true, deprecationMessage = "This call leaks resources. Instead, terminate java threads cooperatively, or use System#exit")//
624+
@Option(help = "The number of nanoseconds that the isolate teardown can take before a fatal error is thrown. Disabled if less or equal to 0.", //
625+
deprecated = true, deprecationMessage = "Use -XX:TearDownFailureSeconds=<secs> instead")//
630626
public static final RuntimeOptionKey<Long> TearDownFailureNanos = new RuntimeOptionKey<>(0L, RelevantForCompilationIsolates);
631-
632627
}
633628

634629
@LayerVerifiedOption(kind = Kind.Changed, severity = Severity.Error)//
@@ -870,24 +865,18 @@ private static void validateZapNativeMemory(HostedOptionKey<Boolean> optionKey)
870865
}
871866
}
872867

873-
/*
874-
* Isolate tear down options.
875-
*/
876-
@Option(help = "The number of seconds before and between which tearing down an isolate gives a warning message. 0 implies no warning.")//
877-
public static final RuntimeOptionKey<Long> TearDownWarningSeconds = new RuntimeOptionKey<>(0L, RelevantForCompilationIsolates);
878-
879868
public static long getTearDownWarningNanos() {
880-
if (TearDownWarningSeconds.hasBeenSet() && DeprecatedOptions.TearDownWarningNanos.hasBeenSet()) {
881-
throw new IllegalArgumentException(DeprecatedOptions.TEAR_DOWN_WARNING_NANOS_ERROR);
882-
}
883-
if (DeprecatedOptions.TearDownWarningNanos.hasBeenSet()) {
884-
return DeprecatedOptions.TearDownWarningNanos.getValue();
869+
if (ConcealedOptions.TearDownWarningSeconds.getValue() != 0) {
870+
return TimeUtils.secondsToNanos(ConcealedOptions.TearDownWarningSeconds.getValue());
885871
}
886-
return TearDownWarningSeconds.getValue() * TimeUtils.nanosPerSecond;
872+
return DeprecatedOptions.TearDownWarningNanos.getValue();
887873
}
888874

889875
public static long getTearDownFailureNanos() {
890-
return TearDownFailureNanos.getValue();
876+
if (ConcealedOptions.TearDownFailureSeconds.getValue() != 0) {
877+
return TimeUtils.secondsToNanos(ConcealedOptions.TearDownFailureSeconds.getValue());
878+
}
879+
return DeprecatedOptions.TearDownFailureNanos.getValue();
891880
}
892881

893882
@Option(help = "Define the maximum number of stores for which the loop that zeroes out objects is unrolled.")//
@@ -1264,6 +1253,15 @@ protected void onValueUpdate(EconomicMap<OptionKey<?>, Object> values, Integer o
12641253
@Option(help = "Avoid linker relocations for code and instead emit address computations.", type = OptionType.Expert) //
12651254
@LayerVerifiedOption(severity = Severity.Error, kind = Kind.Changed, positional = false) //
12661255
public static final HostedOptionKey<Boolean> RelativeCodePointers = new HostedOptionKey<>(false, SubstrateOptions::validateRelativeCodePointers);
1256+
1257+
/** Use {@link SubstrateOptions#getTearDownWarningNanos()} instead. */
1258+
@Option(help = "The number of seconds that the isolate teardown can take before warnings are printed. Disabled if less or equal to 0.")//
1259+
public static final RuntimeOptionKey<Long> TearDownWarningSeconds = new RuntimeOptionKey<>(0L, RelevantForCompilationIsolates);
1260+
1261+
/** Use {@link SubstrateOptions#getTearDownFailureNanos()} instead. */
1262+
@Option(help = "The number of seconds that the isolate teardown can take before a fatal error is thrown. Disabled if less or equal to 0.")//
1263+
public static final RuntimeOptionKey<Long> TearDownFailureSeconds = new RuntimeOptionKey<>(0L, RelevantForCompilationIsolates);
1264+
12671265
}
12681266

12691267
@Option(help = "Overwrites the available number of processors provided by the OS. Any value <= 0 means using the processor count from the OS.")//

substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/graal/snippets/CEntryPointSnippets.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -671,9 +671,7 @@ public static int tearDownIsolateSnippet() {
671671
private static int tearDownIsolate() {
672672
try {
673673
/* Execute interruptible code. */
674-
if (!initiateTearDownIsolateInterruptibly()) {
675-
return CEntryPointErrors.UNSPECIFIED;
676-
}
674+
initiateTearDownIsolateInterruptibly();
677675

678676
/* After threadExit(), only uninterruptible code may be executed. */
679677
RecurringCallbackSupport.suspendCallbackTimer("Execution of arbitrary code is prohibited during the last teardown steps.");
@@ -722,13 +720,10 @@ private static int tearDownIsolate() {
722720
}
723721
}
724722

725-
@Uninterruptible(reason = "Tear-down in progress - still safe to execute interruptible Java code.", calleeMustBe = false)
726-
private static boolean initiateTearDownIsolateInterruptibly() {
723+
@Uninterruptible(reason = "Tear-down in progress - still safe to execute interruptible Java code.", callerMustBe = true, calleeMustBe = false)
724+
private static void initiateTearDownIsolateInterruptibly() {
727725
RuntimeSupport.executeTearDownHooks();
728-
if (!PlatformThreads.tearDownOtherThreads()) {
729-
return false;
730-
}
731-
726+
PlatformThreads.tearDownOtherThreads();
732727
/*
733728
* At this point, only the current thread, the VM operation thread, and the reference
734729
* handler thread are still running.
@@ -739,7 +734,6 @@ private static boolean initiateTearDownIsolateInterruptibly() {
739734

740735
VMThreads.singleton().threadExit();
741736
/* After threadExit(), only uninterruptible code may be executed. */
742-
return true;
743737
}
744738

745739
@Snippet(allowMissingProbabilities = true)

substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/thread/PlatformThreads.java

Lines changed: 98 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import java.util.concurrent.ForkJoinWorkerThread;
4545
import java.util.concurrent.ScheduledThreadPoolExecutor;
4646
import java.util.concurrent.ThreadPoolExecutor;
47-
import java.util.concurrent.atomic.AtomicBoolean;
4847
import java.util.concurrent.atomic.AtomicInteger;
4948

5049
import org.graalvm.nativeimage.CurrentIsolate;
@@ -550,8 +549,8 @@ public void closeOSThreadHandle(OSThreadHandle threadHandle) {
550549
* teardown logic (see {@link #isVMInternalThread(IsolateThread)}). Waits until the interrupted
551550
* threads detach.
552551
*/
553-
public static boolean tearDownOtherThreads() {
554-
final Log trace = Log.noopLog().string("[PlatformThreads.tearDownPlatformThreads:").newline().flush();
552+
public static void tearDownOtherThreads() {
553+
Log trace = Log.noopLog().string("[PlatformThreads.tearDownPlatformThreads:").newline().flush();
555554

556555
/*
557556
* Set tear-down flag for new Java threads that have already been started on an OS level,
@@ -626,46 +625,36 @@ public static boolean tearDownOtherThreads() {
626625
trace.string(" shutdown initiated: ").object(pool).newline().flush();
627626
}
628627

629-
final boolean result = waitForTearDown();
630-
trace.string(" returns: ").bool(result).string("]").newline().flush();
631-
return result;
628+
waitForTearDown();
632629
}
633630

634631
/** Wait (im)patiently for the thread list to drain. */
635-
private static boolean waitForTearDown() {
632+
private static void waitForTearDown() {
636633
assert !isVMInternalThread(CurrentIsolate.getCurrentThread()) : "we count the threads until only the current one remains";
637634

638-
final Log trace = Log.noopLog().string("[PlatformThreads.waitForTearDown:").newline();
639-
final long warningNanos = SubstrateOptions.getTearDownWarningNanos();
640-
final String warningMessage = "PlatformThreads.waitForTearDown is taking too long.";
641-
final long failureNanos = SubstrateOptions.getTearDownFailureNanos();
642-
final String failureMessage = "PlatformThreads.waitForTearDown took too long.";
643-
final long startNanos = System.nanoTime();
644-
long loopNanos = startNanos;
645-
final AtomicBoolean printLaggards = new AtomicBoolean(false);
646-
final Log counterLog = ((warningNanos == 0) ? trace : Log.log());
647-
final CheckReadyForTearDownOperation operation = new CheckReadyForTearDownOperation(counterLog, printLaggards);
648-
649-
for (; /* return */;) {
650-
final long previousLoopNanos = loopNanos;
635+
CheckReadyForTearDownOperation operation = new CheckReadyForTearDownOperation();
636+
long warningConfiguredNanos = SubstrateOptions.getTearDownWarningNanos();
637+
long failureConfiguredNanos = SubstrateOptions.getTearDownFailureNanos();
638+
long startNanos = System.nanoTime();
639+
long previousReportNanos = startNanos;
640+
641+
while (true) {
651642
operation.enqueue();
652643
if (operation.isReadyForTearDown()) {
653-
trace.string(" returns true]").newline();
654-
return true;
644+
return;
655645
}
656-
loopNanos = TimeUtils.doNotLoopTooLong(startNanos, loopNanos, warningNanos, warningMessage);
657-
final boolean fatallyTooLong = TimeUtils.maybeFatallyTooLong(startNanos, failureNanos, failureMessage);
658-
if (fatallyTooLong) {
659-
trace.string("Took too long to tear down the VM.").newline();
660-
/*
661-
* Debugging tip: Insert a `BreakpointNode.breakpoint()` here to stop in gdb or get
662-
* a core file with the thread stacks. Be careful about believing the stack traces,
663-
* though.
664-
*/
665-
return false;
646+
647+
long sinceStartNanos = TimeUtils.nanoSecondsSince(startNanos);
648+
if (failureConfiguredNanos > 0 && TimeUtils.nanoTimeLessThan(failureConfiguredNanos, sinceStartNanos)) {
649+
throw VMError.shouldNotReachHere("Took too long to tear down the VM.");
650+
}
651+
652+
long sinceReportNanos = TimeUtils.nanoSecondsSince(previousReportNanos);
653+
if (warningConfiguredNanos > 0 && TimeUtils.nanoTimeLessThan(warningConfiguredNanos, sinceReportNanos)) {
654+
operation.enablePrintLaggards();
655+
previousReportNanos += sinceReportNanos;
666656
}
667-
/* If I took too long, print the laggards next time around. */
668-
printLaggards.set(previousLoopNanos != loopNanos);
657+
669658
/* Loop impatiently waiting for threads to exit. */
670659
Thread.yield();
671660
}
@@ -1175,14 +1164,15 @@ public void operate() {
11751164
* holding the {@link VMThreads#THREAD_MUTEX}.
11761165
*/
11771166
private static class CheckReadyForTearDownOperation extends JavaVMOperation {
1178-
private final Log trace;
1179-
private final AtomicBoolean printLaggards;
1167+
private boolean printLaggards;
11801168
private boolean readyForTearDown;
11811169

1182-
CheckReadyForTearDownOperation(Log trace, AtomicBoolean printLaggards) {
1170+
CheckReadyForTearDownOperation() {
11831171
super(VMOperationInfos.get(CheckReadyForTearDownOperation.class, "Check ready for teardown", SystemEffect.NONE));
1184-
this.trace = trace;
1185-
this.printLaggards = printLaggards;
1172+
}
1173+
1174+
void enablePrintLaggards() {
1175+
printLaggards = true;
11861176
}
11871177

11881178
boolean isReadyForTearDown() {
@@ -1191,47 +1181,82 @@ boolean isReadyForTearDown() {
11911181

11921182
@Override
11931183
public void operate() {
1194-
int attachedCount = 0;
1195-
int unattachedStartedCount;
11961184
VMMutex lock = VMThreads.THREAD_MUTEX.lock();
11971185
try {
1198-
for (IsolateThread isolateThread = VMThreads.firstThread(); isolateThread.isNonNull(); isolateThread = VMThreads.nextThread(isolateThread)) {
1199-
if (isVMInternalThread(isolateThread)) {
1200-
continue;
1201-
}
1186+
readyForTearDown = isReadyForTeardown();
1187+
} finally {
1188+
lock.unlock();
1189+
}
1190+
}
12021191

1203-
attachedCount++;
1204-
if (printLaggards.get() && trace.isEnabled() && isolateThread != queuingThread) {
1205-
trace.string(" laggard isolateThread: ").hex(isolateThread);
1206-
final Thread thread = PlatformThreads.fromVMThread(isolateThread);
1207-
if (thread != null) {
1208-
final String name = thread.getName();
1209-
final Thread.State status = thread.getState();
1210-
final boolean interruptedStatus = JavaThreads.isInterrupted(thread);
1211-
trace.string(" thread.getName(): ").string(name)
1212-
.string(" interruptedStatus: ").bool(interruptedStatus)
1213-
.string(" getState(): ").string(status.name()).newline();
1214-
for (StackTraceElement e : thread.getStackTrace()) {
1215-
trace.string(e.toString()).newline();
1216-
}
1217-
}
1218-
trace.newline().flush();
1192+
private boolean isReadyForTeardown() {
1193+
int attachedCount = 0;
1194+
boolean printed = false;
1195+
1196+
for (IsolateThread thread = VMThreads.firstThread(); thread.isNonNull(); thread = VMThreads.nextThread(thread)) {
1197+
if (isVMInternalThread(thread)) {
1198+
continue;
1199+
}
1200+
1201+
attachedCount++;
1202+
1203+
/* Print some information about slow threads. */
1204+
if (printLaggards && thread != queuingThread) {
1205+
if (!printed) {
1206+
printed = true;
1207+
Log.log().string("Teardown is taking too long").redent(true);
12191208
}
1209+
1210+
printThreadInfo(Log.log(), thread);
12201211
}
1212+
}
12211213

1222-
/*
1223-
* Note: our counter for unattached started threads is not guarded by the threads
1224-
* mutex and its count could change or have changed within this block. Still, it is
1225-
* important that we hold the threads mutex when querying the counter value: a
1226-
* thread might start another thread and exit immediately after. By holding the
1227-
* threads lock, we prevent the exiting thread from detaching, and/or the starting
1228-
* thread from attaching, so we will never consider being ready for tear-down.
1229-
*/
1230-
unattachedStartedCount = singleton().unattachedStartedThreads.get();
1231-
} finally {
1232-
lock.unlock();
1214+
if (printed) {
1215+
Log.log().indent(false);
1216+
}
1217+
1218+
/*
1219+
* Note: our counter for unattached started threads is not guarded by the threads mutex
1220+
* and its count could change or have changed within this block. Still, it is important
1221+
* that we hold the threads mutex when querying the counter value: a thread might start
1222+
* another thread and exit immediately after. By holding the threads lock, we prevent
1223+
* the exiting thread from detaching, and/or the starting thread from attaching, so we
1224+
* will never consider being ready for tear-down.
1225+
*/
1226+
int unattachedStartedCount = singleton().unattachedStartedThreads.get();
1227+
1228+
printLaggards = false;
1229+
return (attachedCount == 1 && unattachedStartedCount == 0);
1230+
}
1231+
1232+
private static void printThreadInfo(Log log, IsolateThread thread) {
1233+
log.newline().zhex(thread).spaces(1).string(StatusSupport.getStatusString(thread));
1234+
1235+
int safepointBehavior = VMThreads.SafepointBehavior.getSafepointBehaviorVolatile(thread);
1236+
log.string(" (").string(VMThreads.SafepointBehavior.toString(safepointBehavior)).string(")");
1237+
1238+
Thread threadObj = PlatformThreads.fromVMThread(thread);
1239+
if (threadObj == null) {
1240+
log.string(" null");
1241+
} else {
1242+
log.string(" \"").string(threadObj.getName()).string("\" - ").zhex(Word.objectToUntrackedPointer(threadObj));
1243+
1244+
Thread.State status = threadObj.getState();
1245+
log.string(" (").string(status.name()).string(")");
1246+
1247+
if (threadObj.isDaemon()) {
1248+
log.string(", daemon");
1249+
}
1250+
if (JavaThreads.isInterrupted(threadObj)) {
1251+
log.string(", interrupted");
1252+
}
1253+
}
1254+
1255+
log.indent(true);
1256+
for (StackTraceElement e : threadObj.getStackTrace()) {
1257+
log.string(e.toString()).newline();
12331258
}
1234-
readyForTearDown = (attachedCount == 1 && unattachedStartedCount == 0);
1259+
log.redent(false);
12351260
}
12361261
}
12371262

0 commit comments

Comments
 (0)