Skip to content

Commit 4306c90

Browse files
committed
merge revision(s) 62430c1: [Backport #21342]
Properly unlock locked mutexes on thread cleanup. Mutexes were being improperly unlocked on thread cleanup. This bug was introduced in 050a895. We must keep a reference from the mutex to the thread, because if the fiber is collected before the mutex is, then we cannot unlink it from the thread in `mutex_free`. If it's not unlinked from the the thread when it's freed, it causes bugs in `rb_thread_unlock_all_locking_mutexes`. We now mark the fiber when a mutex is locked, and the thread is marked as well. However, a fiber can still be freed in the same GC cycle as the mutex, so the reference to the thread is still needed. The reason we need to mark the fiber is that `mutex_owned_p()` has an ABA issue where if the fiber is collected while it's locked, a new fiber could be allocated at the same memory address and we could get false positives. Fixes [Bug #21342] Co-authored-by: John Hawthorn <[email protected]>
1 parent 88a9614 commit 4306c90

File tree

4 files changed

+153
-17
lines changed

4 files changed

+153
-17
lines changed

test/ruby/test_thread.rb

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1557,4 +1557,97 @@ def test_pending_interrupt?
15571557
assert_equal(true, t.pending_interrupt?(Exception))
15581558
assert_equal(false, t.pending_interrupt?(ArgumentError))
15591559
end
1560+
1561+
def test_deadlock_backtrace
1562+
bug21127 = '[ruby-core:120930] [Bug #21127]'
1563+
1564+
expected_stderr = [
1565+
/-:12:in 'Thread#join': No live threads left. Deadlock\? \(fatal\)\n/,
1566+
/2 threads, 2 sleeps current:\w+ main thread:\w+\n/,
1567+
/\* #<Thread:\w+ sleep_forever>\n/,
1568+
:*,
1569+
/^\s*-:6:in 'Object#frame_for_deadlock_test_2'/,
1570+
:*,
1571+
/\* #<Thread:\w+ -:10 sleep_forever>\n/,
1572+
:*,
1573+
/^\s*-:2:in 'Object#frame_for_deadlock_test_1'/,
1574+
:*,
1575+
]
1576+
1577+
assert_in_out_err([], <<-INPUT, [], expected_stderr, bug21127)
1578+
def frame_for_deadlock_test_1
1579+
yield
1580+
end
1581+
1582+
def frame_for_deadlock_test_2
1583+
yield
1584+
end
1585+
1586+
q = Thread::Queue.new
1587+
t = Thread.new { frame_for_deadlock_test_1 { q.pop } }
1588+
1589+
frame_for_deadlock_test_2 { t.join }
1590+
INPUT
1591+
end
1592+
1593+
# [Bug #21342]
1594+
def test_unlock_locked_mutex_with_collected_fiber
1595+
bug21127 = '[ruby-core:120930] [Bug #21127]'
1596+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1597+
begin;
1598+
5.times do
1599+
m = Mutex.new
1600+
Thread.new do
1601+
m.synchronize do
1602+
end
1603+
end.join
1604+
Fiber.new do
1605+
GC.start
1606+
m.lock
1607+
end.resume
1608+
end
1609+
end;
1610+
end
1611+
1612+
def test_unlock_locked_mutex_with_collected_fiber2
1613+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1614+
begin;
1615+
MUTEXES = []
1616+
5.times do
1617+
m = Mutex.new
1618+
Fiber.new do
1619+
GC.start
1620+
m.lock
1621+
end.resume
1622+
MUTEXES << m
1623+
end
1624+
10.times do
1625+
MUTEXES.clear
1626+
GC.start
1627+
end
1628+
end;
1629+
end
1630+
1631+
def test_mutexes_locked_in_fiber_dont_have_aba_issue_with_new_fibers
1632+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
1633+
begin;
1634+
mutexes = 1000.times.map do
1635+
Mutex.new
1636+
end
1637+
1638+
mutexes.map do |m|
1639+
Fiber.new do
1640+
m.lock
1641+
end.resume
1642+
end
1643+
1644+
GC.start
1645+
1646+
1000.times.map do
1647+
Fiber.new do
1648+
raise "FAILED!" if mutexes.any?(&:owned?)
1649+
end.resume
1650+
end
1651+
end;
1652+
end
15601653
end

thread.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ rb_threadptr_unlock_all_locking_mutexes(rb_thread_t *th)
445445
th->keeping_mutexes = mutex->next_mutex;
446446

447447
// rb_warn("mutex #<%p> was not unlocked by thread #<%p>", (void *)mutex, (void*)th);
448-
448+
VM_ASSERT(mutex->fiber);
449449
const char *error_message = rb_mutex_unlock_th(mutex, th, mutex->fiber);
450450
if (error_message) rb_bug("invalid keeping_mutexes: %s", error_message);
451451
}

thread_sync.c

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ static VALUE rb_eClosedQueueError;
88
/* Mutex */
99
typedef struct rb_mutex_struct {
1010
rb_fiber_t *fiber;
11+
VALUE thread; // even if the fiber is collected, we might need access to the thread in mutex_free
1112
struct rb_mutex_struct *next_mutex;
1213
struct ccan_list_head waitq; /* protected by GVL */
1314
} rb_mutex_t;
@@ -106,8 +107,6 @@ static const char* rb_mutex_unlock_th(rb_mutex_t *mutex, rb_thread_t *th, rb_fib
106107
*
107108
*/
108109

109-
#define mutex_mark ((void(*)(void*))0)
110-
111110
static size_t
112111
rb_mutex_num_waiting(rb_mutex_t *mutex)
113112
{
@@ -123,13 +122,39 @@ rb_mutex_num_waiting(rb_mutex_t *mutex)
123122

124123
rb_thread_t* rb_fiber_threadptr(const rb_fiber_t *fiber);
125124

125+
static bool
126+
locked_p(rb_mutex_t *mutex)
127+
{
128+
return mutex->fiber != 0;
129+
}
130+
131+
static void
132+
mutex_mark(void *ptr)
133+
{
134+
rb_mutex_t *mutex = ptr;
135+
VALUE fiber;
136+
if (locked_p(mutex)) {
137+
fiber = rb_fiberptr_self(mutex->fiber); // rb_fiber_t* doesn't move along with fiber object
138+
if (fiber) rb_gc_mark_movable(fiber);
139+
rb_gc_mark_movable(mutex->thread);
140+
}
141+
}
142+
143+
static void
144+
mutex_compact(void *ptr)
145+
{
146+
rb_mutex_t *mutex = ptr;
147+
if (locked_p(mutex)) {
148+
mutex->thread = rb_gc_location(mutex->thread);
149+
}
150+
}
151+
126152
static void
127153
mutex_free(void *ptr)
128154
{
129155
rb_mutex_t *mutex = ptr;
130-
if (mutex->fiber) {
131-
/* rb_warn("free locked mutex"); */
132-
const char *err = rb_mutex_unlock_th(mutex, rb_fiber_threadptr(mutex->fiber), mutex->fiber);
156+
if (locked_p(mutex)) {
157+
const char *err = rb_mutex_unlock_th(mutex, rb_thread_ptr(mutex->thread), mutex->fiber);
133158
if (err) rb_bug("%s", err);
134159
}
135160
ruby_xfree(ptr);
@@ -143,7 +168,7 @@ mutex_memsize(const void *ptr)
143168

144169
static const rb_data_type_t mutex_data_type = {
145170
"mutex",
146-
{mutex_mark, mutex_free, mutex_memsize,},
171+
{mutex_mark, mutex_free, mutex_memsize, mutex_compact,},
147172
0, 0, RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
148173
};
149174

@@ -204,12 +229,13 @@ rb_mutex_locked_p(VALUE self)
204229
{
205230
rb_mutex_t *mutex = mutex_ptr(self);
206231

207-
return RBOOL(mutex->fiber);
232+
return RBOOL(locked_p(mutex));
208233
}
209234

210235
static void
211236
thread_mutex_insert(rb_thread_t *thread, rb_mutex_t *mutex)
212237
{
238+
RUBY_ASSERT(!mutex->next_mutex);
213239
if (thread->keeping_mutexes) {
214240
mutex->next_mutex = thread->keeping_mutexes;
215241
}
@@ -234,10 +260,24 @@ thread_mutex_remove(rb_thread_t *thread, rb_mutex_t *mutex)
234260
}
235261

236262
static void
237-
mutex_locked(rb_thread_t *th, VALUE self)
263+
mutex_set_owner(VALUE self, rb_thread_t *th, rb_fiber_t *fiber)
264+
{
265+
rb_mutex_t *mutex = mutex_ptr(self);
266+
267+
mutex->thread = th->self;
268+
mutex->fiber = fiber;
269+
RB_OBJ_WRITTEN(self, Qundef, th->self);
270+
if (fiber) {
271+
RB_OBJ_WRITTEN(self, Qundef, rb_fiberptr_self(fiber));
272+
}
273+
}
274+
275+
static void
276+
mutex_locked(rb_thread_t *th, rb_fiber_t *fiber, VALUE self)
238277
{
239278
rb_mutex_t *mutex = mutex_ptr(self);
240279

280+
mutex_set_owner(self, th, fiber);
241281
thread_mutex_insert(th, mutex);
242282
}
243283

@@ -258,9 +298,8 @@ rb_mutex_trylock(VALUE self)
258298

259299
rb_fiber_t *fiber = GET_EC()->fiber_ptr;
260300
rb_thread_t *th = GET_THREAD();
261-
mutex->fiber = fiber;
262301

263-
mutex_locked(th, self);
302+
mutex_locked(th, fiber, self);
264303
return Qtrue;
265304
}
266305
else {
@@ -328,7 +367,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
328367
rb_ensure(call_rb_fiber_scheduler_block, self, delete_from_waitq, (VALUE)&sync_waiter);
329368

330369
if (!mutex->fiber) {
331-
mutex->fiber = fiber;
370+
mutex_set_owner(self, th, fiber);
332371
}
333372
}
334373
else {
@@ -358,6 +397,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
358397
rb_ractor_sleeper_threads_inc(th->ractor);
359398
rb_check_deadlock(th->ractor);
360399

400+
RUBY_ASSERT(!th->locking_mutex);
361401
th->locking_mutex = self;
362402

363403
ccan_list_add_tail(&mutex->waitq, &sync_waiter.node);
@@ -368,7 +408,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
368408

369409
// unlocked by another thread while sleeping
370410
if (!mutex->fiber) {
371-
mutex->fiber = fiber;
411+
mutex_set_owner(self, th, fiber);
372412
}
373413

374414
rb_ractor_sleeper_threads_dec(th->ractor);
@@ -382,10 +422,13 @@ do_mutex_lock(VALUE self, int interruptible_p)
382422
if (interruptible_p) {
383423
/* release mutex before checking for interrupts...as interrupt checking
384424
* code might call rb_raise() */
385-
if (mutex->fiber == fiber) mutex->fiber = 0;
425+
if (mutex->fiber == fiber) {
426+
mutex->thread = Qfalse;
427+
mutex->fiber = NULL;
428+
}
386429
RUBY_VM_CHECK_INTS_BLOCKING(th->ec); /* may release mutex */
387430
if (!mutex->fiber) {
388-
mutex->fiber = fiber;
431+
mutex_set_owner(self, th, fiber);
389432
}
390433
}
391434
else {
@@ -404,7 +447,7 @@ do_mutex_lock(VALUE self, int interruptible_p)
404447
}
405448

406449
if (saved_ints) th->ec->interrupt_flag = saved_ints;
407-
if (mutex->fiber == fiber) mutex_locked(th, self);
450+
if (mutex->fiber == fiber) mutex_locked(th, fiber, self);
408451
}
409452

410453
RUBY_DEBUG_LOG("%p locked", mutex);

version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
1212
#define RUBY_VERSION_TEENY 6
1313
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
14-
#define RUBY_PATCHLEVEL 55
14+
#define RUBY_PATCHLEVEL 56
1515

1616
#include "ruby/version.h"
1717
#include "ruby/internal/abi.h"

0 commit comments

Comments
 (0)