Skip to content

Commit 62430c1

Browse files
Properly unlock locked mutexes on thread cleanup.
Mutexes were being improperly unlocked on thread cleanup. This bug was introduced in 050a895. We must keep a reference from the mutex to the thread, because if the fiber is collected before the mutex is, then we cannot unlink it from the thread in `mutex_free`. If it's not unlinked from the the thread when it's freed, it causes bugs in `rb_thread_unlock_all_locking_mutexes`. We now mark the fiber when a mutex is locked, and the thread is marked as well. However, a fiber can still be freed in the same GC cycle as the mutex, so the reference to the thread is still needed. The reason we need to mark the fiber is that `mutex_owned_p()` has an ABA issue where if the fiber is collected while it's locked, a new fiber could be allocated at the same memory address and we could get false positives. Fixes [Bug #21342] Co-authored-by: John Hawthorn <[email protected]>
1 parent 50393d1 commit 62430c1

File tree

3 files changed

+120
-16
lines changed

3 files changed

+120
-16
lines changed

test/ruby/test_thread.rb

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,4 +1589,65 @@ def frame_for_deadlock_test_2
15891589
frame_for_deadlock_test_2 { t.join }
15901590
INPUT
15911591
end
1592+
1593+
# [Bug #21342]
1594+
def test_unlock_locked_mutex_with_collected_fiber
1595+
bug21127 = '[ruby-core:120930] [Bug #21127]'
1596+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1597+
begin;
1598+
5.times do
1599+
m = Mutex.new
1600+
Thread.new do
1601+
m.synchronize do
1602+
end
1603+
end.join
1604+
Fiber.new do
1605+
GC.start
1606+
m.lock
1607+
end.resume
1608+
end
1609+
end;
1610+
end
1611+
1612+
def test_unlock_locked_mutex_with_collected_fiber2
1613+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1614+
begin;
1615+
MUTEXES = []
1616+
5.times do
1617+
m = Mutex.new
1618+
Fiber.new do
1619+
GC.start
1620+
m.lock
1621+
end.resume
1622+
MUTEXES << m
1623+
end
1624+
10.times do
1625+
MUTEXES.clear
1626+
GC.start
1627+
end
1628+
end;
1629+
end
1630+
1631+
def test_mutexes_locked_in_fiber_dont_have_aba_issue_with_new_fibers
1632+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1633+
begin;
1634+
mutexes = 1000.times.map do
1635+
Mutex.new
1636+
end
1637+
1638+
mutexes.map do |m|
1639+
Fiber.new do
1640+
m.lock
1641+
end.resume
1642+
end
1643+
1644+
GC.start
1645+
1646+
1000.times.map do
1647+
Fiber.new do
1648+
raise "FAILED!" if mutexes.any?(&:owned?)
1649+
end.resume
1650+
end
1651+
end;
1652+
end
15921653
end

thread.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ rb_threadptr_unlock_all_locking_mutexes(rb_thread_t *th)
442442
th->keeping_mutexes = mutex->next_mutex;
443443

444444
// rb_warn("mutex #<%p> was not unlocked by thread #<%p>", (void *)mutex, (void*)th);
445-
445+
VM_ASSERT(mutex->fiber);
446446
const char *error_message = rb_mutex_unlock_th(mutex, th, mutex->fiber);
447447
if (error_message) rb_bug("invalid keeping_mutexes: %s", error_message);
448448
}

thread_sync.c

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ static VALUE rb_eClosedQueueError;
88
/* Mutex */
99
typedef struct rb_mutex_struct {
1010
rb_fiber_t *fiber;
11+
VALUE thread; // even if the fiber is collected, we might need access to the thread in mutex_free
1112
struct rb_mutex_struct *next_mutex;
1213
struct ccan_list_head waitq; /* protected by GVL */
1314
} rb_mutex_t;
@@ -106,8 +107,6 @@ static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_fib
106107
*
107108
*/
108109

109-
#define mutex_mark ((void(*)(void*))0)
110-
111110
static size_t
112111
rb_mutex_num_waiting(rb_mutex_t *mutex)
113112
{
@@ -123,13 +122,39 @@ rb_mutex_num_waiting(rb_mutex_t *mutex)
123122

124123
rb_thread_t* rb_fiber_threadptr(const rb_fiber_t *fiber);
125124

125+
static bool
126+
locked_p(rb_mutex_t *mutex)
127+
{
128+
return mutex->fiber != 0;
129+
}
130+
131+
static void
132+
mutex_mark(void *ptr)
133+
{
134+
rb_mutex_t *mutex = ptr;
135+
VALUE fiber;
136+
if (locked_p(mutex)) {
137+
fiber = rb_fiberptr_self(mutex->fiber); // rb_fiber_t* doesn't move along with fiber object
138+
if (fiber) rb_gc_mark_movable(fiber);
139+
rb_gc_mark_movable(mutex->thread);
140+
}
141+
}
142+
143+
static void
144+
mutex_compact(void *ptr)
145+
{
146+
rb_mutex_t *mutex = ptr;
147+
if (locked_p(mutex)) {
148+
mutex->thread = rb_gc_location(mutex->thread);
149+
}
150+
}
151+
126152
static void
127153
mutex_free(void *ptr)
128154
{
129155
rb_mutex_t *mutex = ptr;
130-
if (mutex->fiber) {
131-
/* rb_warn("free locked mutex"); */
132-
const char *err = rb_mutex_unlock_th(mutex, rb_fiber_threadptr(mutex->fiber), mutex->fiber);
156+
if (locked_p(mutex)) {
157+
const char *err = rb_mutex_unlock_th(mutex, rb_thread_ptr(mutex->thread), mutex->fiber);
133158
if (err) rb_bug("%s", err);
134159
}
135160
ruby_xfree(ptr);
@@ -143,7 +168,7 @@ mutex_memsize(const void *ptr)
143168

144169
static const rb_data_type_t mutex_data_type = {
145170
"mutex",
146-
{mutex_mark, mutex_free, mutex_memsize,},
171+
{mutex_mark, mutex_free, mutex_memsize, mutex_compact,},
147172
0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
148173
};
149174

@@ -204,12 +229,13 @@ rb_mutex_locked_p(VALUE self)
204229
{
205230
rb_mutex_t *mutex = mutex_ptr(self);
206231

207-
return RBOOL(mutex->fiber);
232+
return RBOOL(locked_p(mutex));
208233
}
209234

210235
static void
211236
thread_mutex_insert(rb_thread_t *thread, rb_mutex_t *mutex)
212237
{
238+
RUBY_ASSERT(!mutex->next_mutex);
213239
if (thread->keeping_mutexes) {
214240
mutex->next_mutex = thread->keeping_mutexes;
215241
}
@@ -234,10 +260,24 @@ thread_mutex_remove(rb_thread_t *thread, rb_mutex_t *mutex)
234260
}
235261

236262
static void
237-
mutex_locked(rb_thread_t *th, VALUE self)
263+
mutex_set_owner(VALUE self, rb_thread_t *th, rb_fiber_t *fiber)
264+
{
265+
rb_mutex_t *mutex = mutex_ptr(self);
266+
267+
mutex->thread = th->self;
268+
mutex->fiber = fiber;
269+
RB_OBJ_WRITTEN(self, Qundef, th->self);
270+
if (fiber) {
271+
RB_OBJ_WRITTEN(self, Qundef, rb_fiberptr_self(fiber));
272+
}
273+
}
274+
275+
static void
276+
mutex_locked(rb_thread_t *th, rb_fiber_t *fiber, VALUE self)
238277
{
239278
rb_mutex_t *mutex = mutex_ptr(self);
240279

280+
mutex_set_owner(self, th, fiber);
241281
thread_mutex_insert(th, mutex);
242282
}
243283

@@ -258,9 +298,8 @@ rb_mutex_trylock(VALUE self)
258298

259299
rb_fiber_t *fiber = GET_EC()->fiber_ptr;
260300
rb_thread_t *th = GET_THREAD();
261-
mutex->fiber = fiber;
262301

263-
mutex_locked(th, self);
302+
mutex_locked(th, fiber, self);
264303
return Qtrue;
265304
}
266305
else {
@@ -328,7 +367,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
328367
rb_ensure(call_rb_fiber_scheduler_block, self, delete_from_waitq, (VALUE)&sync_waiter);
329368

330369
if (!mutex->fiber) {
331-
mutex->fiber = fiber;
370+
mutex_set_owner(self, th, fiber);
332371
}
333372
}
334373
else {
@@ -358,6 +397,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
358397
rb_ractor_sleeper_threads_inc(th->ractor);
359398
rb_check_deadlock(th->ractor);
360399

400+
RUBY_ASSERT(!th->locking_mutex);
361401
th->locking_mutex = self;
362402

363403
ccan_list_add_tail(&mutex->waitq, &sync_waiter.node);
@@ -368,7 +408,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
368408

369409
// unlocked by another thread while sleeping
370410
if (!mutex->fiber) {
371-
mutex->fiber = fiber;
411+
mutex_set_owner(self, th, fiber);
372412
}
373413

374414
rb_ractor_sleeper_threads_dec(th->ractor);
@@ -381,10 +421,13 @@ do_mutex_lock(VALUE self, int interruptible_p)
381421
if (interruptible_p) {
382422
/* release mutex before checking for interrupts...as interrupt checking
383423
* code might call rb_raise() */
384-
if (mutex->fiber == fiber) mutex->fiber = 0;
424+
if (mutex->fiber == fiber) {
425+
mutex->thread = Qfalse;
426+
mutex->fiber = NULL;
427+
}
385428
RUBY_VM_CHECK_INTS_BLOCKING(th->ec); /* may release mutex */
386429
if (!mutex->fiber) {
387-
mutex->fiber = fiber;
430+
mutex_set_owner(self, th, fiber);
388431
}
389432
}
390433
else {
@@ -403,7 +446,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
403446
}
404447

405448
if (saved_ints) th->ec->interrupt_flag = saved_ints;
406-
if (mutex->fiber == fiber) mutex_locked(th, self);
449+
if (mutex->fiber == fiber) mutex_locked(th, fiber, self);
407450
}
408451

409452
RUBY_DEBUG_LOG("%p locked", mutex);

0 commit comments

Comments
 (0)