Skip to content

Commit fca258f

Browse files
committed
Fix deadlock when malloc in Ractor lock
If we malloc when the current Ractor is locked, we can deadlock because GC requires VM lock and Ractor barrier. If another Ractor is waiting on this Ractor lock, then it will deadlock because the other Ractor will never join the barrier. For example, this script deadlocks: r = Ractor.new do loop do Ractor::Port.new end end 100000.times do |i| r.send(nil) puts i end On debug builds, it fails with this assertion error: vm_sync.c:75: Assertion Failed: vm_lock_enter:cr->sync.locked_by != rb_ractor_self(cr) On non-debug builds, we can see that it deadlocks in the debugger: Main Ractor: frame #3: 0x000000010021fdc4 miniruby`rb_native_mutex_lock(lock=<unavailable>) at thread_pthread.c:115:14 frame Shopify#4: 0x0000000100193eb8 miniruby`ractor_send0 [inlined] ractor_lock(r=<unavailable>, file=<unavailable>, line=1180) at ractor.c:73:5 frame Shopify#5: 0x0000000100193eb0 miniruby`ractor_send0 [inlined] ractor_send_basket(ec=<unavailable>, rp=0x0000000131092840, b=0x000000011c63de80, raise_on_error=true) at ractor_sync.c:1180:5 frame Shopify#6: 0x0000000100193eac miniruby`ractor_send0(ec=<unavailable>, rp=0x0000000131092840, obj=4, move=<unavailable>, raise_on_error=true) at ractor_sync.c:1211:5 Second Ractor: frame #2: 0x00000001002208d0 miniruby`rb_ractor_sched_barrier_start [inlined] rb_native_cond_wait(cond=<unavailable>, mutex=<unavailable>) at thread_pthread.c:221:13 frame #3: 0x00000001002208cc miniruby`rb_ractor_sched_barrier_start(vm=0x000000013180d600, cr=0x0000000131093460) at thread_pthread.c:1438:13 frame Shopify#4: 0x000000010028a328 miniruby`rb_vm_barrier at vm_sync.c:262:13 [artificial] frame Shopify#5: 0x00000001000dfa6c miniruby`gc_start [inlined] rb_gc_vm_barrier at gc.c:179:5 frame Shopify#6: 0x00000001000dfa68 miniruby`gc_start [inlined] gc_enter(objspace=0x000000013180fc00, event=gc_enter_event_start, lock_lev=<unavailable>) at default.c:6636:9 frame Shopify#7: 0x00000001000dfa48 miniruby`gc_start(objspace=0x000000013180fc00, reason=<unavailable>) at default.c:6361:5 frame Shopify#8: 0x00000001000e3fd8 miniruby`objspace_malloc_increase_body [inlined] garbage_collect(objspace=0x000000013180fc00, reason=512) at default.c:6341:15 frame Shopify#9: 0x00000001000e3fa4 miniruby`objspace_malloc_increase_body [inlined] garbage_collect_with_gvl(objspace=0x000000013180fc00, reason=512) at default.c:6741:16 frame Shopify#10: 0x00000001000e3f88 miniruby`objspace_malloc_increase_body(objspace=0x000000013180fc00, mem=<unavailable>, new_size=<unavailable>, old_size=<unavailable>, type=<unavailable>) at default.c:8007:13 frame Shopify#11: 0x00000001000e3c44 miniruby`rb_gc_impl_malloc [inlined] objspace_malloc_fixup(objspace=0x000000013180fc00, mem=0x000000011c700000, size=12582912) at default.c:8085:5 frame Shopify#12: 0x00000001000e3c30 miniruby`rb_gc_impl_malloc(objspace_ptr=0x000000013180fc00, size=12582912) at default.c:8182:12 frame Shopify#13: 0x00000001000d4584 miniruby`ruby_xmalloc [inlined] ruby_xmalloc_body(size=<unavailable>) at gc.c:5128:12 frame Shopify#14: 0x00000001000d4568 miniruby`ruby_xmalloc(size=<unavailable>) at gc.c:5118:34 frame Shopify#15: 0x00000001001eb184 miniruby`rb_st_init_existing_table_with_size(tab=0x000000011c2b4b40, type=<unavailable>, size=<unavailable>) at st.c:559:39 frame Shopify#16: 0x00000001001ebc74 miniruby`rebuild_table_if_necessary [inlined] rb_st_init_table_with_size(type=0x00000001004f4a78, size=524287) at st.c:585:5 frame Shopify#17: 0x00000001001ebc5c miniruby`rebuild_table_if_necessary [inlined] rebuild_table(tab=0x000000013108e2f0) at st.c:753:19 frame Shopify#18: 0x00000001001ebbfc miniruby`rebuild_table_if_necessary(tab=0x000000013108e2f0) at st.c:1125:9 frame Shopify#19: 0x00000001001eba08 miniruby`rb_st_insert(tab=0x000000013108e2f0, key=262144, value=4767566624) at st.c:1143:5 frame Shopify#20: 0x0000000100194b84 miniruby`ractor_port_initialzie [inlined] ractor_add_port(r=0x0000000131093460, id=262144) at ractor_sync.c:399:9 frame Shopify#21: 0x0000000100194b58 miniruby`ractor_port_initialzie [inlined] ractor_port_init(rpv=4750065560, r=0x0000000131093460) at ractor_sync.c:87:5 frame Shopify#22: 0x0000000100194b34 miniruby`ractor_port_initialzie(self=4750065560) at ractor_sync.c:103:12
1 parent 9bc53dc commit fca258f

File tree

7 files changed

+66
-30
lines changed

7 files changed

+66
-30
lines changed

bootstraptest/test_ractor.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,3 +2372,26 @@ def call_test(obj)
23722372
ractors.each(&:join)
23732373
:ok
23742374
RUBY
2375+
2376+
# This test checks that we do not trigger a GC when we have malloc with Ractor
2377+
# locks. We cannot trigger a GC with Ractor locks because GC requires VM lock
2378+
# and Ractor barrier. If another Ractor is waiting on this Ractor lock, then it
2379+
# will deadlock because the other Ractor will never join the barrier.
2380+
#
2381+
# Creating Ractor::Port requires locking the Ractor and inserting into an
2382+
# st_table, which can call malloc.
2383+
assert_equal 'ok', <<~'RUBY'
2384+
r = Ractor.new do
2385+
loop do
2386+
Ractor::Port.new
2387+
end
2388+
end
2389+
2390+
10.times do
2391+
10_000.times do
2392+
r.send(nil)
2393+
end
2394+
sleep(0.01)
2395+
end
2396+
:ok
2397+
RUBY

gc.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -625,9 +625,9 @@ typedef struct gc_function_map {
625625
size_t (*heap_id_for_size)(void *objspace_ptr, size_t size);
626626
bool (*size_allocatable_p)(size_t size);
627627
// Malloc
628-
void *(*malloc)(void *objspace_ptr, size_t size);
629-
void *(*calloc)(void *objspace_ptr, size_t size);
630-
void *(*realloc)(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size);
628+
void *(*malloc)(void *objspace_ptr, size_t size, bool gc_allowed);
629+
void *(*calloc)(void *objspace_ptr, size_t size, bool gc_allowed);
630+
void *(*realloc)(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size, bool gc_allowed);
631631
void (*free)(void *objspace_ptr, void *ptr, size_t old_size);
632632
void (*adjust_memory_usage)(void *objspace_ptr, ssize_t diff);
633633
// Marking
@@ -5118,14 +5118,22 @@ ruby_xmalloc(size_t size)
51185118
return handle_malloc_failure(ruby_xmalloc_body(size));
51195119
}
51205120

5121+
static bool
5122+
malloc_gc_allowed(void)
5123+
{
5124+
rb_ractor_t *r = rb_current_ractor_raw(false);
5125+
5126+
return r == NULL || !r->malloc_gc_disabled;
5127+
}
5128+
51215129
static void *
51225130
ruby_xmalloc_body(size_t size)
51235131
{
51245132
if ((ssize_t)size < 0) {
51255133
negative_size_allocation_error("too large allocation size");
51265134
}
51275135

5128-
return rb_gc_impl_malloc(rb_gc_get_objspace(), size);
5136+
return rb_gc_impl_malloc(rb_gc_get_objspace(), size, malloc_gc_allowed());
51295137
}
51305138

51315139
void
@@ -5155,7 +5163,7 @@ ruby_xmalloc2(size_t n, size_t size)
51555163
static void *
51565164
ruby_xmalloc2_body(size_t n, size_t size)
51575165
{
5158-
return rb_gc_impl_malloc(rb_gc_get_objspace(), xmalloc2_size(n, size));
5166+
return rb_gc_impl_malloc(rb_gc_get_objspace(), xmalloc2_size(n, size), malloc_gc_allowed());
51595167
}
51605168

51615169
static void *ruby_xcalloc_body(size_t n, size_t size);
@@ -5169,7 +5177,7 @@ ruby_xcalloc(size_t n, size_t size)
51695177
static void *
51705178
ruby_xcalloc_body(size_t n, size_t size)
51715179
{
5172-
return rb_gc_impl_calloc(rb_gc_get_objspace(), xmalloc2_size(n, size));
5180+
return rb_gc_impl_calloc(rb_gc_get_objspace(), xmalloc2_size(n, size), malloc_gc_allowed());
51735181
}
51745182

51755183
static void *ruby_sized_xrealloc_body(void *ptr, size_t new_size, size_t old_size);
@@ -5190,7 +5198,7 @@ ruby_sized_xrealloc_body(void *ptr, size_t new_size, size_t old_size)
51905198
negative_size_allocation_error("too large allocation size");
51915199
}
51925200

5193-
return rb_gc_impl_realloc(rb_gc_get_objspace(), ptr, new_size, old_size);
5201+
return rb_gc_impl_realloc(rb_gc_get_objspace(), ptr, new_size, old_size, malloc_gc_allowed());
51945202
}
51955203

51965204
void *
@@ -5214,7 +5222,7 @@ static void *
52145222
ruby_sized_xrealloc2_body(void *ptr, size_t n, size_t size, size_t old_n)
52155223
{
52165224
size_t len = xmalloc2_size(n, size);
5217-
return rb_gc_impl_realloc(rb_gc_get_objspace(), ptr, len, old_n * size);
5225+
return rb_gc_impl_realloc(rb_gc_get_objspace(), ptr, len, old_n * size, malloc_gc_allowed());
52185226
}
52195227

52205228
void *

gc/default/default.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7970,7 +7970,7 @@ objspace_malloc_gc_stress(rb_objspace_t *objspace)
79707970
}
79717971

79727972
static inline bool
7973-
objspace_malloc_increase_report(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type)
7973+
objspace_malloc_increase_report(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type, bool gc_allowed)
79747974
{
79757975
if (0) fprintf(stderr, "increase - ptr: %p, type: %s, new_size: %"PRIdSIZE", old_size: %"PRIdSIZE"\n",
79767976
mem,
@@ -7982,7 +7982,7 @@ objspace_malloc_increase_report(rb_objspace_t *objspace, void *mem, size_t new_s
79827982
}
79837983

79847984
static bool
7985-
objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type)
7985+
objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_size, size_t old_size, enum memop_type type, bool gc_allowed)
79867986
{
79877987
if (new_size > old_size) {
79887988
RUBY_ATOMIC_SIZE_ADD(malloc_increase, new_size - old_size);
@@ -7997,7 +7997,7 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz
79977997
#endif
79987998
}
79997999

8000-
if (type == MEMOP_TYPE_MALLOC) {
8000+
if (type == MEMOP_TYPE_MALLOC && gc_allowed) {
80018001
retry:
80028002
if (malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) {
80038003
if (ruby_thread_has_gvl_p() && is_lazy_sweeping(objspace)) {
@@ -8079,10 +8079,10 @@ malloc_during_gc_p(rb_objspace_t *objspace)
80798079
}
80808080

80818081
static inline void *
8082-
objspace_malloc_fixup(rb_objspace_t *objspace, void *mem, size_t size)
8082+
objspace_malloc_fixup(rb_objspace_t *objspace, void *mem, size_t size, bool gc_allowed)
80838083
{
80848084
size = objspace_malloc_size(objspace, mem, size);
8085-
objspace_malloc_increase(objspace, mem, size, 0, MEMOP_TYPE_MALLOC) {}
8085+
objspace_malloc_increase(objspace, mem, size, 0, MEMOP_TYPE_MALLOC, gc_allowed) {}
80868086

80878087
#if CALC_EXACT_MALLOC_SIZE
80888088
{
@@ -8114,10 +8114,10 @@ objspace_malloc_fixup(rb_objspace_t *objspace, void *mem, size_t size)
81148114
GPR_FLAG_MALLOC; \
81158115
objspace_malloc_gc_stress(objspace); \
81168116
\
8117-
if (RB_LIKELY((expr))) { \
8117+
if (RB_LIKELY((expr))) { \
81188118
/* Success on 1st try */ \
81198119
} \
8120-
else if (!garbage_collect_with_gvl(objspace, gpr)) { \
8120+
else if (gc_allowed && !garbage_collect_with_gvl(objspace, gpr)) { \
81218121
/* @shyouhei thinks this doesn't happen */ \
81228122
GC_MEMERROR("TRY_WITH_GC: could not GC"); \
81238123
} \
@@ -8160,15 +8160,15 @@ rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size)
81608160
#endif
81618161
old_size = objspace_malloc_size(objspace, ptr, old_size);
81628162

8163-
objspace_malloc_increase(objspace, ptr, 0, old_size, MEMOP_TYPE_FREE) {
8163+
objspace_malloc_increase(objspace, ptr, 0, old_size, MEMOP_TYPE_FREE, true) {
81648164
free(ptr);
81658165
ptr = NULL;
81668166
RB_DEBUG_COUNTER_INC(heap_xfree);
81678167
}
81688168
}
81698169

81708170
void *
8171-
rb_gc_impl_malloc(void *objspace_ptr, size_t size)
8171+
rb_gc_impl_malloc(void *objspace_ptr, size_t size, bool gc_allowed)
81728172
{
81738173
rb_objspace_t *objspace = objspace_ptr;
81748174
check_malloc_not_in_gc(objspace, "malloc");
@@ -8179,11 +8179,11 @@ rb_gc_impl_malloc(void *objspace_ptr, size_t size)
81798179
TRY_WITH_GC(size, mem = malloc(size));
81808180
RB_DEBUG_COUNTER_INC(heap_xmalloc);
81818181
if (!mem) return mem;
8182-
return objspace_malloc_fixup(objspace, mem, size);
8182+
return objspace_malloc_fixup(objspace, mem, size, gc_allowed);
81838183
}
81848184

81858185
void *
8186-
rb_gc_impl_calloc(void *objspace_ptr, size_t size)
8186+
rb_gc_impl_calloc(void *objspace_ptr, size_t size, bool gc_allowed)
81878187
{
81888188
rb_objspace_t *objspace = objspace_ptr;
81898189

@@ -8199,27 +8199,27 @@ rb_gc_impl_calloc(void *objspace_ptr, size_t size)
81998199
size = objspace_malloc_prepare(objspace, size);
82008200
TRY_WITH_GC(size, mem = calloc1(size));
82018201
if (!mem) return mem;
8202-
return objspace_malloc_fixup(objspace, mem, size);
8202+
return objspace_malloc_fixup(objspace, mem, size, gc_allowed);
82038203
}
82048204

82058205
void *
8206-
rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size)
8206+
rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size, bool gc_allowed)
82078207
{
82088208
rb_objspace_t *objspace = objspace_ptr;
82098209

82108210
check_malloc_not_in_gc(objspace, "realloc");
82118211

82128212
void *mem;
82138213

8214-
if (!ptr) return rb_gc_impl_malloc(objspace, new_size);
8214+
if (!ptr) return rb_gc_impl_malloc(objspace, new_size, gc_allowed);
82158215

82168216
/*
82178217
* The behavior of realloc(ptr, 0) is implementation defined.
82188218
* Therefore we don't use realloc(ptr, 0) for portability reason.
82198219
* see http://www.open-std.org/jtc1/sc22/wg14/www/docs/dr_400.htm
82208220
*/
82218221
if (new_size == 0) {
8222-
if ((mem = rb_gc_impl_malloc(objspace, 0)) != NULL) {
8222+
if ((mem = rb_gc_impl_malloc(objspace, 0, gc_allowed)) != NULL) {
82238223
/*
82248224
* - OpenBSD's malloc(3) man page says that when 0 is passed, it
82258225
* returns a non-NULL pointer to an access-protected memory page.
@@ -8278,7 +8278,7 @@ rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_si
82788278
}
82798279
#endif
82808280

8281-
objspace_malloc_increase(objspace, mem, new_size, old_size, MEMOP_TYPE_REALLOC);
8281+
objspace_malloc_increase(objspace, mem, new_size, old_size, MEMOP_TYPE_REALLOC, gc_allowed);
82828282

82838283
RB_DEBUG_COUNTER_INC(heap_xrealloc);
82848284
return mem;
@@ -8290,10 +8290,10 @@ rb_gc_impl_adjust_memory_usage(void *objspace_ptr, ssize_t diff)
82908290
rb_objspace_t *objspace = objspace_ptr;
82918291

82928292
if (diff > 0) {
8293-
objspace_malloc_increase(objspace, 0, diff, 0, MEMOP_TYPE_REALLOC);
8293+
objspace_malloc_increase(objspace, 0, diff, 0, MEMOP_TYPE_REALLOC, true);
82948294
}
82958295
else if (diff < 0) {
8296-
objspace_malloc_increase(objspace, 0, 0, -diff, MEMOP_TYPE_REALLOC);
8296+
objspace_malloc_increase(objspace, 0, 0, -diff, MEMOP_TYPE_REALLOC, true);
82978297
}
82988298
}
82998299

gc/gc_impl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ GC_IMPL_FN bool rb_gc_impl_size_allocatable_p(size_t size);
7272
* memory just return NULL (with appropriate errno set).
7373
* The caller side takes care of that situation.
7474
*/
75-
GC_IMPL_FN void *rb_gc_impl_malloc(void *objspace_ptr, size_t size);
76-
GC_IMPL_FN void *rb_gc_impl_calloc(void *objspace_ptr, size_t size);
77-
GC_IMPL_FN void *rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size);
75+
GC_IMPL_FN void *rb_gc_impl_malloc(void *objspace_ptr, size_t size, bool gc_allowed);
76+
GC_IMPL_FN void *rb_gc_impl_calloc(void *objspace_ptr, size_t size, bool gc_allowed);
77+
GC_IMPL_FN void *rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_size, bool gc_allowed);
7878
GC_IMPL_FN void rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size);
7979
GC_IMPL_FN void rb_gc_impl_adjust_memory_usage(void *objspace_ptr, ssize_t diff);
8080
// Marking

ractor.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ ractor_lock(rb_ractor_t *r, const char *file, int line)
7171

7272
ASSERT_ractor_unlocking(r);
7373
rb_native_mutex_lock(&r->sync.lock);
74+
r->malloc_gc_disabled = true;
7475

7576
#if RACTOR_CHECK_MODE > 0
7677
if (rb_current_execution_context(false) != NULL) {
@@ -99,6 +100,10 @@ ractor_unlock(rb_ractor_t *r, const char *file, int line)
99100
#if RACTOR_CHECK_MODE > 0
100101
r->sync.locked_by = Qnil;
101102
#endif
103+
104+
VM_ASSERT(r->malloc_gc_disabled);
105+
106+
r->malloc_gc_disabled = false;
102107
rb_native_mutex_unlock(&r->sync.lock);
103108

104109
RUBY_DEBUG_LOG2(file, line, "r:%u%s", r->pub.id, rb_current_ractor_raw(false) == r ? " (self)" : "");

ractor_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ struct rb_ractor_struct {
9898
VALUE verbose;
9999
VALUE debug;
100100

101+
bool malloc_gc_disabled;
101102
void *newobj_cache;
102103
}; // rb_ractor_t is defined in vm_core.h
103104

ractor_sync.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,6 @@ ractor_add_port(rb_ractor_t *r, st_data_t id)
395395

396396
RACTOR_LOCK(r);
397397
{
398-
// memo: can cause GC, but GC doesn't use ractor locking.
399398
st_insert(r->sync.ports, id, (st_data_t)rq);
400399
}
401400
RACTOR_UNLOCK(r);

0 commit comments

Comments
 (0)