Skip to content

Commit 15b7a42

Browse files
author
William Kemper
committed
8373819: Genshen: Control thread can miss allocation failure notification (redux)
Reviewed-by: kdnilsen, ysr
1 parent 9a2592f commit 15b7a42

File tree

4 files changed

+78
-50
lines changed

4 files changed

+78
-50
lines changed

src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@ ShenandoahGenerationalControlThread::ShenandoahGenerationalControlThread() :
6161

6262
void ShenandoahGenerationalControlThread::run_service() {
6363

64+
// This is the only instance of request. It is important that request.generation
65+
// does not change between a concurrent cycle failure and the start of a degenerated
66+
// cycle. We initialize it with the young generation to handle the pathological case
67+
// where the very first cycle is degenerated (some tests exercise this path).
6468
ShenandoahGCRequest request;
69+
request.generation = _heap->young_generation();
6570
while (!should_terminate()) {
6671

6772
// Figure out if we have pending requests.
@@ -77,12 +82,10 @@ void ShenandoahGenerationalControlThread::run_service() {
7782

7883
// If the cycle was cancelled, continue the next iteration to deal with it. Otherwise,
7984
// if there was no other cycle requested, cleanup and wait for the next request.
80-
if (!_heap->cancelled_gc()) {
81-
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
82-
if (_requested_gc_cause == GCCause::_no_gc) {
83-
set_gc_mode(ml, none);
84-
ml.wait();
85-
}
85+
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
86+
if (_requested_gc_cause == GCCause::_no_gc) {
87+
set_gc_mode(ml, none);
88+
ml.wait();
8689
}
8790
}
8891

@@ -96,38 +99,47 @@ void ShenandoahGenerationalControlThread::stop_service() {
9699
log_debug(gc, thread)("Stopping control thread");
97100
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
98101
_heap->cancel_gc(GCCause::_shenandoah_stop_vm);
99-
_requested_gc_cause = GCCause::_shenandoah_stop_vm;
100-
notify_cancellation(ml, GCCause::_shenandoah_stop_vm);
102+
notify_control_thread(ml, GCCause::_shenandoah_stop_vm);
101103
// We can't wait here because it may interfere with the active cycle's ability
102104
// to reach a safepoint (this runs on a java thread).
103105
}
104106

105107
void ShenandoahGenerationalControlThread::check_for_request(ShenandoahGCRequest& request) {
106108
// Hold the lock while we read request cause and generation
107109
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
108-
if (_heap->cancelled_gc()) {
109-
// The previous request was cancelled. Either it was cancelled for an allocation
110-
// failure (degenerated cycle), or old marking was cancelled to run a young collection.
111-
// In either case, the correct generation for the next cycle can be determined by
112-
// the cancellation cause.
113-
request.cause = _heap->clear_cancellation(GCCause::_shenandoah_concurrent_gc);
114-
if (request.cause == GCCause::_shenandoah_concurrent_gc) {
110+
111+
log_debug(gc, thread)("cancelled cause: %s, requested cause: %s",
112+
GCCause::to_string(_heap->cancelled_cause()), GCCause::to_string(_requested_gc_cause));
113+
114+
request.cause = _requested_gc_cause;
115+
if (ShenandoahCollectorPolicy::is_allocation_failure(request.cause)) {
116+
if (_degen_point == ShenandoahGC::_degenerated_unset) {
115117
request.generation = _heap->young_generation();
118+
_degen_point = ShenandoahGC::_degenerated_outside_cycle;
119+
} else {
120+
assert(request.generation != nullptr, "Must know which generation to use for degenerated cycle");
116121
}
117122
} else {
118-
request.cause = _requested_gc_cause;
123+
if (request.cause == GCCause::_shenandoah_concurrent_gc) {
124+
// This is a regulator request. It is also possible that the regulator "canceled" an old mark,
125+
// so we can clear that here. This clear operation will only clear the cancellation if it is
126+
// a regulator request.
127+
_heap->clear_cancellation(GCCause::_shenandoah_concurrent_gc);
128+
}
119129
request.generation = _requested_generation;
120-
121-
// Only clear these if we made a request from them. In the case of a cancelled gc,
122-
// we do not want to inadvertently lose this pending request.
123-
_requested_gc_cause = GCCause::_no_gc;
124-
_requested_generation = nullptr;
125130
}
126131

132+
log_debug(gc, thread)("request.cause: %s, request.generation: %s",
133+
GCCause::to_string(request.cause), request.generation == nullptr ? "None" : request.generation->name());
134+
135+
_requested_gc_cause = GCCause::_no_gc;
136+
_requested_generation = nullptr;
137+
127138
if (request.cause == GCCause::_no_gc || request.cause == GCCause::_shenandoah_stop_vm) {
128139
return;
129140
}
130141

142+
assert(request.generation != nullptr, "request.generation cannot be null, cause is: %s", GCCause::to_string(request.cause));
131143
GCMode mode;
132144
if (ShenandoahCollectorPolicy::is_allocation_failure(request.cause)) {
133145
mode = prepare_for_allocation_failure_gc(request);
@@ -140,11 +152,9 @@ void ShenandoahGenerationalControlThread::check_for_request(ShenandoahGCRequest&
140152
}
141153

142154
ShenandoahGenerationalControlThread::GCMode ShenandoahGenerationalControlThread::prepare_for_allocation_failure_gc(ShenandoahGCRequest &request) {
143-
144-
if (_degen_point == ShenandoahGC::_degenerated_unset) {
145-
_degen_point = ShenandoahGC::_degenerated_outside_cycle;
146-
request.generation = _heap->young_generation();
147-
} else if (request.generation->is_old()) {
155+
// Important: not all paths update the request.generation. This is intentional.
156+
// A degenerated cycle must use the same generation carried over from the previous request.
157+
if (request.generation->is_old()) {
148158
// This means we degenerated during the young bootstrap for the old generation
149159
// cycle. The following degenerated cycle should therefore also be young.
150160
request.generation = _heap->young_generation();
@@ -588,6 +598,8 @@ bool ShenandoahGenerationalControlThread::check_cancellation_or_degen(Shenandoah
588598
if (ShenandoahCollectorPolicy::is_allocation_failure(_heap->cancelled_cause())) {
589599
assert(_degen_point == ShenandoahGC::_degenerated_unset,
590600
"Should not be set yet: %s", ShenandoahGC::degen_point_to_string(_degen_point));
601+
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
602+
_requested_gc_cause = _heap->cancelled_cause();
591603
_degen_point = point;
592604
log_debug(gc, thread)("Cancellation detected:, reason: %s, degen point: %s",
593605
GCCause::to_string(_heap->cancelled_cause()),
@@ -633,9 +645,7 @@ void ShenandoahGenerationalControlThread::service_stw_degenerated_cycle(const Sh
633645

634646
void ShenandoahGenerationalControlThread::request_gc(GCCause::Cause cause) {
635647
if (ShenandoahCollectorPolicy::is_allocation_failure(cause)) {
636-
// GC should already be cancelled. Here we are just notifying the control thread to
637-
// wake up and handle the cancellation request, so we don't need to set _requested_gc_cause.
638-
notify_cancellation(cause);
648+
notify_control_thread(cause);
639649
} else if (ShenandoahCollectorPolicy::should_handle_requested_gc(cause)) {
640650
handle_requested_gc(cause);
641651
}
@@ -653,15 +663,16 @@ bool ShenandoahGenerationalControlThread::request_concurrent_gc(ShenandoahGenera
653663
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
654664
if (gc_mode() == servicing_old) {
655665
if (!preempt_old_marking(generation)) {
656-
log_debug(gc, thread)("Cannot start young, old collection is not preemptible");
666+
// Global should be able to cause old collection to be abandoned
667+
log_debug(gc, thread)("Cannot start %s, old collection is not preemptible", generation->name());
657668
return false;
658669
}
659670

660671
// Cancel the old GC and wait for the control thread to start servicing the new request.
661672
log_info(gc)("Preempting old generation mark to allow %s GC", generation->name());
662673
while (gc_mode() == servicing_old) {
663674
ShenandoahHeap::heap()->cancel_gc(GCCause::_shenandoah_concurrent_gc);
664-
notify_cancellation(ml, GCCause::_shenandoah_concurrent_gc);
675+
notify_control_thread(ml, GCCause::_shenandoah_concurrent_gc, generation);
665676
ml.wait();
666677
}
667678
return true;
@@ -695,21 +706,34 @@ void ShenandoahGenerationalControlThread::notify_control_thread(GCCause::Cause c
695706

696707
void ShenandoahGenerationalControlThread::notify_control_thread(MonitorLocker& ml, GCCause::Cause cause, ShenandoahGeneration* generation) {
697708
assert(_control_lock.is_locked(), "Request lock must be held here");
698-
log_debug(gc, thread)("Notify control (%s): %s, %s", gc_mode_name(gc_mode()), GCCause::to_string(cause), generation->name());
699-
_requested_gc_cause = cause;
700-
_requested_generation = generation;
701-
ml.notify();
709+
if (ShenandoahCollectorPolicy::is_allocation_failure(_requested_gc_cause)) {
710+
// We have already observed a request to handle an allocation failure. We cannot allow
711+
// another request (System.gc or regulator) to subvert the degenerated cycle.
712+
log_debug(gc, thread)("Not overwriting gc cause %s with %s", GCCause::to_string(_requested_gc_cause), GCCause::to_string(cause));
713+
} else {
714+
log_debug(gc, thread)("Notify control (%s): %s, %s", gc_mode_name(gc_mode()), GCCause::to_string(cause), generation->name());
715+
_requested_gc_cause = cause;
716+
_requested_generation = generation;
717+
ml.notify();
718+
}
702719
}
703720

704-
void ShenandoahGenerationalControlThread::notify_cancellation(GCCause::Cause cause) {
721+
void ShenandoahGenerationalControlThread::notify_control_thread(GCCause::Cause cause) {
705722
MonitorLocker ml(&_control_lock, Mutex::_no_safepoint_check_flag);
706-
notify_cancellation(ml, cause);
723+
notify_control_thread(ml, cause);
707724
}
708725

709-
void ShenandoahGenerationalControlThread::notify_cancellation(MonitorLocker& ml, GCCause::Cause cause) {
710-
assert(_heap->cancelled_gc(), "GC should already be cancelled");
711-
log_debug(gc,thread)("Notify control (%s): %s", gc_mode_name(gc_mode()), GCCause::to_string(cause));
712-
ml.notify();
726+
void ShenandoahGenerationalControlThread::notify_control_thread(MonitorLocker& ml, GCCause::Cause cause) {
727+
assert(_control_lock.is_locked(), "Request lock must be held here");
728+
if (ShenandoahCollectorPolicy::is_allocation_failure(_requested_gc_cause)) {
729+
// We have already observed a request to handle an allocation failure. We cannot allow
730+
// another request (System.gc or regulator) to subvert the degenerated cycle.
731+
log_debug(gc, thread)("Not overwriting gc cause %s with %s", GCCause::to_string(_requested_gc_cause), GCCause::to_string(cause));
732+
} else {
733+
log_debug(gc, thread)("Notify control (%s): %s", gc_mode_name(gc_mode()), GCCause::to_string(cause));
734+
_requested_gc_cause = cause;
735+
ml.notify();
736+
}
713737
}
714738

715739
bool ShenandoahGenerationalControlThread::preempt_old_marking(ShenandoahGeneration* generation) {

src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.hpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,13 @@ class ShenandoahGenerationalControlThread: public ShenandoahController {
135135
// Return printable name for the given gc mode.
136136
static const char* gc_mode_name(GCMode mode);
137137

138-
// Takes the request lock and updates the requested cause and generation, then notifies the control thread.
139-
// The overloaded variant should be used when the _control_lock is already held.
138+
// These notify the control thread after updating _requested_gc_cause and (optionally) _requested_generation.
139+
// Updating the requested generation is not necessary for allocation failures nor when stopping the thread.
140+
void notify_control_thread(GCCause::Cause cause);
141+
void notify_control_thread(MonitorLocker& ml, GCCause::Cause cause);
140142
void notify_control_thread(GCCause::Cause cause, ShenandoahGeneration* generation);
141143
void notify_control_thread(MonitorLocker& ml, GCCause::Cause cause, ShenandoahGeneration* generation);
142144

143-
// Notifies the control thread, but does not update the requested cause or generation.
144-
// The overloaded variant should be used when the _control_lock is already held.
145-
void notify_cancellation(GCCause::Cause cause);
146-
void notify_cancellation(MonitorLocker& ml, GCCause::Cause cause);
147-
148145
// Configure the heap to age objects and regions if the aging period has elapsed.
149146
void maybe_set_aging_cycle();
150147

src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@ class ShenandoahGenerationalHeap : public ShenandoahHeap {
4444
void post_initialize_heuristics() override;
4545

4646
static ShenandoahGenerationalHeap* heap() {
47-
assert(ShenandoahCardBarrier, "Should have card barrier to use genenrational heap");
47+
assert(ShenandoahCardBarrier, "Should have card barrier to use generational heap");
4848
CollectedHeap* heap = Universe::heap();
4949
return cast(heap);
5050
}
5151

5252
static ShenandoahGenerationalHeap* cast(CollectedHeap* heap) {
53-
assert(ShenandoahCardBarrier, "Should have card barrier to use genenrational heap");
53+
assert(ShenandoahCardBarrier, "Should have card barrier to use generational heap");
5454
return checked_cast<ShenandoahGenerationalHeap*>(heap);
5555
}
5656

src/hotspot/share/gc/shenandoah/shenandoahRegulatorThread.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ bool ShenandoahRegulatorThread::start_global_cycle() const {
149149

150150
bool ShenandoahRegulatorThread::request_concurrent_gc(ShenandoahGeneration* generation) const {
151151
double now = os::elapsedTime();
152+
153+
// This call may find the control thread waiting on workers which have suspended
154+
// to allow a safepoint to run. If this regulator thread does not yield, the safepoint
155+
// will not run. The worker threads won't progress, the control thread won't progress,
156+
// and the regulator thread may never yield. Therefore, we leave the suspendible
157+
// thread set before making this call.
158+
SuspendibleThreadSetLeaver leaver;
152159
bool accepted = _control_thread->request_concurrent_gc(generation);
153160
if (LogTarget(Debug, gc, thread)::is_enabled() && accepted) {
154161
double wait_time = os::elapsedTime() - now;

0 commit comments

Comments
 (0)