Skip to content

Commit 5922c3c

Browse files
committed
8371990: Remove two second delayed OOME after GC shutdown
Reviewed-by: kbarrett, tschatzl, ayang, iwalulya
1 parent df24b90 commit 5922c3c

File tree

14 files changed

+78
-98
lines changed

14 files changed

+78
-98
lines changed

src/hotspot/share/gc/g1/g1CollectedHeap.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -478,11 +478,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(uint node_index, size_t word_
478478
log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu words",
479479
Thread::current()->name(), word_size);
480480

481-
if (is_shutting_down()) {
482-
stall_for_vm_shutdown();
483-
return nullptr;
484-
}
485-
486481
// Has the gc overhead limit been reached in the meantime? If so, this mutator
487482
// should receive null even when unsuccessfully scheduling a collection as well
488483
// for global consistency.
@@ -738,11 +733,6 @@ HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size) {
738733
log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating %zu",
739734
Thread::current()->name(), word_size);
740735

741-
if (is_shutting_down()) {
742-
stall_for_vm_shutdown();
743-
return nullptr;
744-
}
745-
746736
// Has the gc overhead limit been reached in the meantime? If so, this mutator
747737
// should receive null even when unsuccessfully scheduling a collection as well
748738
// for global consistency.
@@ -1645,6 +1635,10 @@ jint G1CollectedHeap::initialize() {
16451635
return JNI_OK;
16461636
}
16471637

1638+
bool G1CollectedHeap::concurrent_mark_is_terminating() const {
1639+
return _cm_thread->should_terminate();
1640+
}
1641+
16481642
void G1CollectedHeap::stop() {
16491643
// Stop all concurrent threads. We do this to make sure these threads
16501644
// do not continue to execute and access resources (e.g. logging)
@@ -1965,8 +1959,8 @@ bool G1CollectedHeap::try_collect_concurrently(size_t allocation_word_size,
19651959
}
19661960

19671961
// If VMOp skipped initiating concurrent marking cycle because
1968-
// we're terminating, then we're done.
1969-
if (is_shutting_down()) {
1962+
// we're shutting down, then we're done.
1963+
if (op.is_shutting_down()) {
19701964
LOG_COLLECT_CONCURRENTLY(cause, "skipped: terminating");
19711965
return false;
19721966
}

src/hotspot/share/gc/g1/g1CollectedHeap.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,9 @@ class G1CollectedHeap : public CollectedHeap {
917917
// specified by the policy object.
918918
jint initialize() override;
919919

920+
// Returns whether concurrent mark threads (and the VM) are about to terminate.
921+
bool concurrent_mark_is_terminating() const;
922+
920923
void safepoint_synchronize_begin() override;
921924
void safepoint_synchronize_end() override;
922925

src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,7 @@ bool G1ConcurrentMark::concurrent_cycle_abort() {
18831883
// nothing, but this situation should be extremely rare (a full gc after shutdown
18841884
// has been signalled is already rare), and this work should be negligible compared
18851885
// to actual full gc work.
1886-
if (!cm_thread()->in_progress() && !_g1h->is_shutting_down()) {
1886+
if (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating()) {
18871887
return false;
18881888
}
18891889

src/hotspot/share/gc/g1/g1Policy.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,6 @@ bool G1Policy::should_retain_evac_failed_region(uint index) const {
669669
}
670670

671671
void G1Policy::record_pause_start_time() {
672-
assert(!_g1h->is_shutting_down(), "Invariant!");
673672
Ticks now = Ticks::now();
674673
_cur_pause_start_sec = now.seconds();
675674

@@ -1280,12 +1279,6 @@ void G1Policy::decide_on_concurrent_start_pause() {
12801279
// concurrent start pause).
12811280
assert(!collector_state()->in_concurrent_start_gc(), "pre-condition");
12821281

1283-
// We should not be starting a concurrent start pause if the concurrent mark
1284-
// thread is terminating.
1285-
if (_g1h->is_shutting_down()) {
1286-
return;
1287-
}
1288-
12891282
if (collector_state()->initiate_conc_mark_if_possible()) {
12901283
// We had noticed on a previous pause that the heap occupancy has
12911284
// gone over the initiating threshold and we should start a

src/hotspot/share/gc/g1/g1RemSet.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,14 +1018,8 @@ class G1MergeHeapRootsTask : public WorkerTask {
10181018
// the pause occurs during the Concurrent Cleanup for Next Mark phase.
10191019
// Only at that point the region's bitmap may contain marks while being in the collection
10201020
// set at the same time.
1021-
//
1022-
// There is one exception: shutdown might have aborted the Concurrent Cleanup for Next
1023-
// Mark phase midway, which might have also left stale marks in old generation regions.
1024-
// There might actually have been scheduled multiple collections, but at that point we do
1025-
// not care that much about performance and just do the work multiple times if needed.
1026-
return (_g1h->collector_state()->clear_bitmap_in_progress() ||
1027-
_g1h->is_shutting_down()) &&
1028-
hr->is_old();
1021+
return _g1h->collector_state()->clear_bitmap_in_progress() &&
1022+
hr->is_old();
10291023
}
10301024

10311025
public:

src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -344,11 +344,6 @@ HeapWord* ParallelScavengeHeap::mem_allocate_work(size_t size, bool is_tlab) {
344344
assert(is_in_or_null(op.result()), "result not in heap");
345345
return op.result();
346346
}
347-
348-
if (is_shutting_down()) {
349-
stall_for_vm_shutdown();
350-
return nullptr;
351-
}
352347
}
353348

354349
// Was the gc-overhead reached inside the safepoint? If so, this mutator

src/hotspot/share/gc/serial/serialHeap.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -337,11 +337,6 @@ HeapWord* SerialHeap::mem_allocate_work(size_t size, bool is_tlab) {
337337
break;
338338
}
339339

340-
if (is_shutting_down()) {
341-
stall_for_vm_shutdown();
342-
return nullptr;
343-
}
344-
345340
// Give a warning if we seem to be looping forever.
346341
if ((QueuedAllocationWarningCount > 0) &&
347342
(try_count % QueuedAllocationWarningCount == 0)) {

src/hotspot/share/gc/shared/collectedHeap.cpp

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262

6363
class ClassLoaderData;
6464

65+
bool CollectedHeap::_is_shutting_down = false;
66+
6567
size_t CollectedHeap::_lab_alignment_reserve = SIZE_MAX;
6668
Klass* CollectedHeap::_filler_object_klass = nullptr;
6769
size_t CollectedHeap::_filler_array_max_size = 0;
@@ -386,11 +388,6 @@ MetaWord* CollectedHeap::satisfy_failed_metadata_allocation(ClassLoaderData* loa
386388
return op.result();
387389
}
388390

389-
if (is_shutting_down()) {
390-
stall_for_vm_shutdown();
391-
return nullptr;
392-
}
393-
394391
loop_count++;
395392
if ((QueuedAllocationWarningCount > 0) &&
396393
(loop_count % QueuedAllocationWarningCount == 0)) {
@@ -605,30 +602,20 @@ void CollectedHeap::post_initialize() {
605602
initialize_serviceability();
606603
}
607604

608-
bool CollectedHeap::is_shutting_down() const {
609-
return Universe::is_shutting_down();
605+
bool CollectedHeap::is_shutting_down() {
606+
assert(Heap_lock->owned_by_self(), "Protected by this lock");
607+
return _is_shutting_down;
610608
}
611609

612-
void CollectedHeap::stall_for_vm_shutdown() {
613-
assert(is_shutting_down(), "Precondition");
614-
// Stall the thread (2 seconds) instead of an indefinite wait to avoid deadlock
615-
// if the VM shutdown triggers a GC.
616-
// The 2-seconds sleep is:
617-
// - long enough to keep daemon threads stalled, while the shutdown
618-
// sequence completes in the common case.
619-
// - short enough to avoid excessive stall time if the shutdown itself
620-
// triggers a GC.
621-
JavaThread::current()->sleep(2 * MILLIUNITS);
622-
623-
ResourceMark rm;
624-
log_warning(gc, alloc)("%s: Stall for VM-Shutdown timed out; allocation may fail with OOME", Thread::current()->name());
625-
}
610+
void CollectedHeap::initiate_shutdown() {
611+
{
612+
// Acquire the Heap_lock to synchronize with VM_Heap_Sync_Operations,
613+
// which may depend on the value of _is_shutting_down flag.
614+
MutexLocker hl(Heap_lock);
615+
_is_shutting_down = true;
616+
}
626617

627-
void CollectedHeap::before_exit() {
628618
print_tracing_info();
629-
630-
// Stop any on-going concurrent work and prepare for exit.
631-
stop();
632619
}
633620

634621
size_t CollectedHeap::bootstrap_max_memory() const {

src/hotspot/share/gc/shared/collectedHeap.hpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ class CollectedHeap : public CHeapObj<mtGC> {
9696
friend class MemAllocator;
9797

9898
private:
99+
static bool _is_shutting_down;
100+
99101
GCHeapLog* _heap_log;
100102
GCMetaspaceLog* _metaspace_log;
101103

@@ -209,11 +211,10 @@ class CollectedHeap : public CHeapObj<mtGC> {
209211
// Default implementation does nothing.
210212
virtual void print_tracing_info() const = 0;
211213

214+
public:
212215
// Stop any onging concurrent work and prepare for exit.
213216
virtual void stop() = 0;
214217

215-
public:
216-
217218
static inline size_t filler_array_max_size() {
218219
return _filler_array_max_size;
219220
}
@@ -245,14 +246,9 @@ class CollectedHeap : public CHeapObj<mtGC> {
245246
// This is the correct place to place such initialization methods.
246247
virtual void post_initialize();
247248

248-
bool is_shutting_down() const;
249-
250-
// If the VM is shutting down, we may have skipped VM_CollectForAllocation.
251-
// In this case, stall the allocation request briefly in the hope that
252-
// the VM shutdown completes before the allocation request returns.
253-
void stall_for_vm_shutdown();
249+
static bool is_shutting_down();
254250

255-
void before_exit();
251+
void initiate_shutdown();
256252

257253
// Stop and resume concurrent GC threads interfering with safepoint operations
258254
virtual void safepoint_synchronize_begin() {}

src/hotspot/share/gc/shared/gcVMOperations.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,22 @@ static bool should_use_gclocker() {
9292
return UseSerialGC || UseParallelGC;
9393
}
9494

95+
static void block_if_java_thread() {
96+
Thread* thread = Thread::current();
97+
if (thread->is_Java_thread()) {
98+
// Block here and allow the shutdown to complete
99+
while (true) {
100+
// The call to wait has a few important effects:
101+
// 1) Block forever (minus spurious wake-ups, hence the loop)
102+
// 2) Release the Heap_lock, which is taken by the shutdown code
103+
// 3) Transition to blocked state so that the final VM_Exit operation can be scheduled
104+
Heap_lock->wait();
105+
}
106+
} else {
107+
assert(thread->is_ConcurrentGC_thread(), "Unexpected thread type");
108+
}
109+
}
110+
95111
bool VM_GC_Operation::doit_prologue() {
96112
assert(_gc_cause != GCCause::_no_gc, "Illegal GCCause");
97113

@@ -110,8 +126,15 @@ bool VM_GC_Operation::doit_prologue() {
110126
}
111127
VM_Heap_Sync_Operation::doit_prologue();
112128

129+
_is_shutting_down = CollectedHeap::is_shutting_down();
130+
if (_is_shutting_down) {
131+
// Block forever if a Java thread is triggering a GC after
132+
// the GC has started to shut down.
133+
block_if_java_thread();
134+
}
135+
113136
// Check invocations
114-
if (skip_operation() || Universe::is_shutting_down()) {
137+
if (skip_operation() || _is_shutting_down) {
115138
// skip collection
116139
Heap_lock->unlock();
117140
if (should_use_gclocker()) {

0 commit comments

Comments
 (0)