Skip to content

Commit 0350290

Browse files
committed
Ractor: Fix moving embedded objects
[Bug #20271] [Bug #20267] [Bug #20255] `rb_obj_alloc(RBASIC_CLASS(obj))` will always allocate from the basic 40B pool, so if `obj` is larger than `40B`, we'll create a corrupted object when we later copy the shape_id. Instead we can use the same logic than ractor copy, which is to use `rb_obj_clone`, and later ask the GC to free the original object. We then must turn it into a `T_OBJECT`, because otherwise just changing its class to `RactorMoved` leaves a lot of ways to keep using the object, e.g.: ``` a = [1, 2, 3] Ractor.new{}.send(a, move: true) [].concat(a) # Should raise, but wasn't. ``` If it turns out that `rb_obj_clone` isn't performant enough for some uses, we can always have carefully crafted specialized paths for the types that would benefit from it.
1 parent 532b924 commit 0350290

File tree

9 files changed

+218
-104
lines changed

9 files changed

+218
-104
lines changed

bootstraptest/test_ractor.rb

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,3 +1987,127 @@ def require feature
19871987
GC.start
19881988
:ok.itself
19891989
}
1990+
1991+
# moved objects being corrupted if embeded (String)
1992+
assert_equal 'ok', %q{
1993+
ractor = Ractor.new { Ractor.receive }
1994+
obj = "foobarbazfoobarbazfoobarbazfoobarbaz"
1995+
ractor.send(obj.dup, move: true)
1996+
roundtripped_obj = ractor.take
1997+
roundtripped_obj == obj ? :ok : roundtripped_obj
1998+
}
1999+
2000+
# moved objects being corrupted if embeded (Array)
2001+
assert_equal 'ok', %q{
2002+
ractor = Ractor.new { Ractor.receive }
2003+
obj = Array.new(10, 42)
2004+
ractor.send(obj.dup, move: true)
2005+
roundtripped_obj = ractor.take
2006+
roundtripped_obj == obj ? :ok : roundtripped_obj
2007+
}
2008+
2009+
# moved objects being corrupted if embeded (Hash)
2010+
assert_equal 'ok', %q{
2011+
ractor = Ractor.new { Ractor.receive }
2012+
obj = { foo: 1, bar: 2 }
2013+
ractor.send(obj.dup, move: true)
2014+
roundtripped_obj = ractor.take
2015+
roundtripped_obj == obj ? :ok : roundtripped_obj
2016+
}
2017+
2018+
# moved objects being corrupted if embeded (MatchData)
2019+
assert_equal 'ok', %q{
2020+
ractor = Ractor.new { Ractor.receive }
2021+
obj = "foo".match(/o/)
2022+
ractor.send(obj.dup, move: true)
2023+
roundtripped_obj = ractor.take
2024+
roundtripped_obj == obj ? :ok : roundtripped_obj
2025+
}
2026+
2027+
# moved objects being corrupted if embeded (Struct)
2028+
assert_equal 'ok', %q{
2029+
ractor = Ractor.new { Ractor.receive }
2030+
obj = Struct.new(:a, :b, :c, :d, :e, :f).new(1, 2, 3, 4, 5, 6)
2031+
ractor.send(obj.dup, move: true)
2032+
roundtripped_obj = ractor.take
2033+
roundtripped_obj == obj ? :ok : roundtripped_obj
2034+
}
2035+
2036+
# moved objects being corrupted if embeded (Object)
2037+
assert_equal 'ok', %q{
2038+
ractor = Ractor.new { Ractor.receive }
2039+
class SomeObject
2040+
attr_reader :a, :b, :c, :d, :e, :f
2041+
def initialize
2042+
@a = @b = @c = @d = @e = @f = 1
2043+
end
2044+
2045+
def ==(o)
2046+
@a == o.a &&
2047+
@b == o.b &&
2048+
@c == o.c &&
2049+
@d == o.d &&
2050+
@e == o.e &&
2051+
@f == o.f
2052+
end
2053+
end
2054+
2055+
SomeObject.new # initial non-embeded
2056+
2057+
obj = SomeObject.new
2058+
ractor.send(obj.dup, move: true)
2059+
roundtripped_obj = ractor.take
2060+
roundtripped_obj == obj ? :ok : roundtripped_obj
2061+
}
2062+
2063+
# moved arrays can't be used
2064+
assert_equal 'ok', %q{
2065+
ractor = Ractor.new { Ractor.receive }
2066+
obj = [1]
2067+
ractor.send(obj, move: true)
2068+
begin
2069+
[].concat(obj)
2070+
rescue TypeError
2071+
:ok
2072+
else
2073+
:fail
2074+
end
2075+
}
2076+
2077+
# moved strings can't be used
2078+
assert_equal 'ok', %q{
2079+
ractor = Ractor.new { Ractor.receive }
2080+
obj = "hello"
2081+
ractor.send(obj, move: true)
2082+
begin
2083+
"".replace(obj)
2084+
rescue TypeError
2085+
:ok
2086+
else
2087+
:fail
2088+
end
2089+
}
2090+
2091+
# moved hashes can't be used
2092+
assert_equal 'ok', %q{
2093+
ractor = Ractor.new { Ractor.receive }
2094+
obj = { a: 1 }
2095+
ractor.send(obj, move: true)
2096+
begin
2097+
{}.merge(obj)
2098+
rescue TypeError
2099+
:ok
2100+
else
2101+
:fail
2102+
end
2103+
}
2104+
2105+
# moved objects keep their object_id
2106+
assert_equal 'ok', %q{
2107+
ractor = Ractor.new { Ractor.receive }
2108+
obj = Object.new
2109+
id = obj.object_id
2110+
ractor.send(obj, move: true)
2111+
roundtripped_obj = ractor.take
2112+
roundtripped_obj.object_id == id ? :ok : :fail
2113+
}

common.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13531,6 +13531,7 @@ ractor.$(OBJEXT): $(top_srcdir)/internal/gc.h
1353113531
ractor.$(OBJEXT): $(top_srcdir)/internal/hash.h
1353213532
ractor.$(OBJEXT): $(top_srcdir)/internal/imemo.h
1353313533
ractor.$(OBJEXT): $(top_srcdir)/internal/numeric.h
13534+
ractor.$(OBJEXT): $(top_srcdir)/internal/object.h
1353413535
ractor.$(OBJEXT): $(top_srcdir)/internal/ractor.h
1353513536
ractor.$(OBJEXT): $(top_srcdir)/internal/rational.h
1353613537
ractor.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h

gc.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ typedef struct gc_function_map {
665665
// Object ID
666666
VALUE (*object_id)(void *objspace_ptr, VALUE obj);
667667
VALUE (*object_id_to_ref)(void *objspace_ptr, VALUE object_id);
668+
void (*object_id_move)(void *objspace_ptr, VALUE dest, VALUE src);
668669
// Forking
669670
void (*before_fork)(void *objspace_ptr);
670671
void (*after_fork)(void *objspace_ptr, rb_pid_t pid);
@@ -842,6 +843,7 @@ ruby_modular_gc_init(void)
842843
// Object ID
843844
load_modular_gc_func(object_id);
844845
load_modular_gc_func(object_id_to_ref);
846+
load_modular_gc_func(object_id_move);
845847
// Forking
846848
load_modular_gc_func(before_fork);
847849
load_modular_gc_func(after_fork);
@@ -925,6 +927,7 @@ ruby_modular_gc_init(void)
925927
// Object ID
926928
# define rb_gc_impl_object_id rb_gc_functions.object_id
927929
# define rb_gc_impl_object_id_to_ref rb_gc_functions.object_id_to_ref
930+
# define rb_gc_impl_object_id_move rb_gc_functions.object_id_move
928931
// Forking
929932
# define rb_gc_impl_before_fork rb_gc_functions.before_fork
930933
# define rb_gc_impl_after_fork rb_gc_functions.after_fork
@@ -966,7 +969,6 @@ rb_objspace_alloc(void)
966969

967970
void *objspace = rb_gc_impl_objspace_alloc();
968971
ruby_current_vm_ptr->gc.objspace = objspace;
969-
970972
rb_gc_impl_objspace_init(objspace);
971973
rb_gc_impl_stress_set(objspace, initial_stress);
972974

@@ -2659,6 +2661,19 @@ rb_gc_mark_roots(void *objspace, const char **categoryp)
26592661

26602662
#define TYPED_DATA_REFS_OFFSET_LIST(d) (size_t *)(uintptr_t)RTYPEDDATA(d)->type->function.dmark
26612663

2664+
void
2665+
rb_gc_ractor_moved(VALUE dest, VALUE src)
2666+
{
2667+
void *objspace = rb_gc_get_objspace();
2668+
if (UNLIKELY(FL_TEST_RAW(src, FL_SEEN_OBJ_ID))) {
2669+
rb_gc_impl_object_id_move(objspace, dest, src);
2670+
}
2671+
2672+
rb_gc_obj_free(objspace, src);
2673+
MEMZERO((void *)src, char, rb_gc_obj_slot_size(src));
2674+
RBASIC(src)->flags = T_OBJECT | FL_FREEZE; // Avoid mutations using bind_call, etc.
2675+
}
2676+
26622677
void
26632678
rb_gc_mark_children(void *objspace, VALUE obj)
26642679
{

gc/default/default.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,25 +1564,6 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr)
15641564
!RVALUE_MARKED(objspace, ptr);
15651565
}
15661566

1567-
VALUE
1568-
rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
1569-
{
1570-
rb_objspace_t *objspace = objspace_ptr;
1571-
1572-
VALUE obj;
1573-
if (st_lookup(objspace->id_to_obj_tbl, object_id, &obj) &&
1574-
!rb_gc_impl_garbage_object_p(objspace, obj)) {
1575-
return obj;
1576-
}
1577-
1578-
if (rb_funcall(object_id, rb_intern(">="), 1, ULL2NUM(objspace->next_object_id))) {
1579-
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
1580-
}
1581-
else {
1582-
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
1583-
}
1584-
}
1585-
15861567
VALUE
15871568
rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
15881569
{
@@ -1614,6 +1595,46 @@ rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
16141595
return id;
16151596
}
16161597

1598+
VALUE
1599+
rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
1600+
{
1601+
rb_objspace_t *objspace = objspace_ptr;
1602+
1603+
VALUE obj;
1604+
if (st_lookup(objspace->id_to_obj_tbl, object_id, &obj) &&
1605+
!rb_gc_impl_garbage_object_p(objspace, obj)) {
1606+
return obj;
1607+
}
1608+
1609+
if (rb_funcall(object_id, rb_intern(">="), 1, ULL2NUM(objspace->next_object_id))) {
1610+
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
1611+
}
1612+
else {
1613+
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
1614+
}
1615+
}
1616+
1617+
void
1618+
rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src)
1619+
{
1620+
/* If the source object's object_id has been seen, we need to update
1621+
* the object to object id mapping. */
1622+
st_data_t id = 0;
1623+
rb_objspace_t *objspace = objspace_ptr;
1624+
1625+
unsigned int lev = rb_gc_vm_lock();
1626+
st_data_t key = (st_data_t)src;
1627+
if (!st_delete(objspace->obj_to_id_tbl, &key, &id)) {
1628+
rb_bug("gc_move: object ID seen, but not in mapping table: %s", rb_obj_info(src));
1629+
}
1630+
FL_UNSET_RAW(src, FL_SEEN_OBJ_ID);
1631+
1632+
st_insert(objspace->obj_to_id_tbl, (st_data_t)dest, id);
1633+
st_insert(objspace->id_to_obj_tbl, id, (st_data_t)dest);
1634+
FL_SET_RAW(dest, FL_SEEN_OBJ_ID);
1635+
rb_gc_vm_unlock(lev);
1636+
}
1637+
16171638
static void free_stack_chunks(mark_stack_t *);
16181639
static void mark_stack_free_cache(mark_stack_t *);
16191640
static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page);

gc/gc_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ GC_IMPL_FN void rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr);
103103
// Object ID
104104
GC_IMPL_FN VALUE rb_gc_impl_object_id(void *objspace_ptr, VALUE obj);
105105
GC_IMPL_FN VALUE rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id);
106+
GC_IMPL_FN void rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src);
106107
// Forking
107108
GC_IMPL_FN void rb_gc_impl_before_fork(void *objspace_ptr);
108109
GC_IMPL_FN void rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid);

gc/mmtk/mmtk.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,16 +1108,21 @@ objspace_obj_id_init(struct objspace *objspace)
11081108
VALUE
11091109
rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
11101110
{
1111+
VALUE id;
11111112
struct objspace *objspace = objspace_ptr;
11121113

11131114
unsigned int lev = rb_gc_vm_lock();
1114-
1115-
VALUE id;
1116-
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &id)) {
1117-
RUBY_ASSERT(FL_TEST(obj, FL_SEEN_OBJ_ID));
1115+
if (FL_TEST(obj, FL_SEEN_OBJ_ID)) {
1116+
st_data_t val;
1117+
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &val)) {
1118+
id = (VALUE)val;
1119+
}
1120+
else {
1121+
rb_bug("rb_gc_impl_object_id: FL_SEEN_OBJ_ID flag set but not found in table");
1122+
}
11181123
}
11191124
else {
1120-
RUBY_ASSERT(!FL_TEST(obj, FL_SEEN_OBJ_ID));
1125+
RUBY_ASSERT(!st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, NULL));
11211126

11221127
id = ULL2NUM(objspace->next_object_id);
11231128
objspace->next_object_id += OBJ_ID_INCREMENT;
@@ -1126,7 +1131,6 @@ rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
11261131
st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj);
11271132
FL_SET(obj, FL_SEEN_OBJ_ID);
11281133
}
1129-
11301134
rb_gc_vm_unlock(lev);
11311135

11321136
return id;
@@ -1151,6 +1155,27 @@ rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
11511155
}
11521156
}
11531157

1158+
void
1159+
rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src)
1160+
{
1161+
/* If the source object's object_id has been seen, we need to update
1162+
* the object to object id mapping. */
1163+
st_data_t id = 0;
1164+
struct objspace *objspace = objspace_ptr;
1165+
1166+
unsigned int lev = rb_gc_vm_lock();
1167+
st_data_t key = (st_data_t)src;
1168+
if (!st_delete(objspace->obj_to_id_tbl, &key, &id)) {
1169+
rb_bug("gc_move: object ID seen, but not in mapping table: %s", rb_obj_info(src));
1170+
}
1171+
FL_UNSET_RAW(src, FL_SEEN_OBJ_ID);
1172+
1173+
st_insert(objspace->obj_to_id_tbl, (st_data_t)dest, id);
1174+
st_insert(objspace->id_to_obj_tbl, id, (st_data_t)dest);
1175+
FL_SET_RAW(dest, FL_SEEN_OBJ_ID);
1176+
rb_gc_vm_unlock(lev);
1177+
}
1178+
11541179
// Forking
11551180

11561181
void

internal/gc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ struct rb_gc_object_metadata_entry {
183183
/* gc.c */
184184
RUBY_ATTR_MALLOC void *ruby_mimmalloc(size_t size);
185185
RUBY_ATTR_MALLOC void *ruby_mimcalloc(size_t num, size_t size);
186+
void rb_gc_ractor_moved(VALUE dest, VALUE src);
186187
void ruby_mimfree(void *ptr);
187188
void rb_gc_prepare_heap(void);
188189
void rb_objspace_set_event_hook(const rb_event_flag_t event);

0 commit comments

Comments
 (0)