Skip to content

Commit e328137

Browse files
vtjnashDrvi
authored andcommitted
staticdata: handle cycles in datatypes (JuliaLang#52752)
Handle any sort of cycle encountered in the datatype super fields by always deferring that field until later and setting a deferred mechanism for updating the field only after the supertype is ready. Fix JuliaLang#52660 (cherry picked from commit c94b1a3)
1 parent 19dd691 commit e328137

File tree

3 files changed

+95
-71
lines changed

3 files changed

+95
-71
lines changed

src/staticdata.c

Lines changed: 84 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ static arraylist_t deser_sym;
295295
static htable_t external_objects;
296296

297297
static htable_t serialization_order; // to break cycles, mark all objects that are serialized
298-
static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
299298
static htable_t nullptrs;
300299
// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
301300
// must be "toplevel" in this queue. For types, parameters and field types must appear
@@ -463,6 +462,7 @@ typedef struct {
463462
arraylist_t relocs_list; // a list of (location, target) pairs, see description at top
464463
arraylist_t gctags_list; // "
465464
arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
465+
arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization
466466
arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated
467467
arraylist_t fixup_types; // a list of locations of types requiring (re)caching
468468
arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching
@@ -726,14 +726,13 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
726726
{
727727
jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
728728
jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
729+
const jl_datatype_layout_t *layout = t->layout;
729730

730731
if (!recursive)
731732
goto done_fields;
732733

733734
if (s->incremental && jl_is_datatype(v) && immediate) {
734735
jl_datatype_t *dt = (jl_datatype_t*)v;
735-
// ensure super is queued (though possibly not yet handled, since it may have cycles)
736-
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
737736
// ensure all type parameters are recached
738737
jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
739738
jl_value_t *singleton = dt->instance;
@@ -743,7 +742,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
743742
// (it may get serialized from elsewhere though)
744743
record_field_change(&dt->instance, jl_nothing);
745744
}
746-
immediate = 0; // do not handle remaining fields immediately (just field types remains)
745+
goto done_fields; // for now
747746
}
748747
if (s->incremental && jl_is_method_instance(v)) {
749748
jl_method_instance_t *mi = (jl_method_instance_t*)v;
@@ -800,11 +799,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
800799
}
801800
}
802801

803-
804802
if (immediate) // must be things that can be recursively handled, and valid as type parameters
805803
assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));
806804

807-
const jl_datatype_layout_t *layout = t->layout;
808805
if (layout->npointers == 0) {
809806
// bitstypes do not require recursion
810807
}
@@ -860,22 +857,35 @@ done_fields: ;
860857

861858
// We've encountered an item we need to cache
862859
void **bp = ptrhash_bp(&serialization_order, v);
863-
assert(*bp != (void*)(uintptr_t)-1);
864-
if (s->incremental) {
865-
void **bp2 = ptrhash_bp(&unique_ready, v);
866-
if (*bp2 == HT_NOTFOUND)
867-
assert(*bp == (void*)(uintptr_t)-2);
868-
else if (*bp != (void*)(uintptr_t)-2)
869-
return;
870-
}
871-
else {
872-
assert(*bp == (void*)(uintptr_t)-2);
873-
}
860+
assert(*bp == (void*)(uintptr_t)-2);
874861
arraylist_push(&serialization_queue, (void*) v);
875862
size_t idx = serialization_queue.len - 1;
876863
assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
877-
878864
*bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
865+
866+
// DataType is very unusual, in that some of the fields need to be pre-order, and some
867+
// (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
868+
// try to promote itself to be immediate)
869+
if (s->incremental && jl_is_datatype(v) && immediate && recursive) {
870+
jl_datatype_t *dt = (jl_datatype_t*)v;
871+
void **bp = ptrhash_bp(&serialization_order, (void*)dt->super);
872+
if (*bp != (void*)-2) {
873+
// if super is already on the stack of things to handle when this returns, do
874+
// not try to handle it now
875+
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate);
876+
}
877+
immediate = 0;
878+
char *data = (char*)jl_data_ptr(v);
879+
size_t i, np = layout->npointers;
880+
for (i = 0; i < np; i++) {
881+
uint32_t ptr = jl_ptr_offset(t, i);
882+
if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super))
883+
continue; // skip the super field, since it might not be quite validly ordered
884+
int mutabl = 1;
885+
jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
886+
jl_queue_for_serialization_(s, fld, 1, immediate);
887+
}
888+
}
879889
}
880890

881891
static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
@@ -894,28 +904,19 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
894904
}
895905

896906
void **bp = ptrhash_bp(&serialization_order, v);
897-
if (*bp == HT_NOTFOUND) {
898-
*bp = (void*)(uintptr_t)(immediate ? -2 : -1);
899-
}
900-
else {
901-
if (!s->incremental || !immediate || !recursive)
902-
return;
903-
void **bp2 = ptrhash_bp(&unique_ready, v);
904-
if (*bp2 == HT_NOTFOUND)
905-
*bp2 = v; // now is unique_ready
906-
else {
907-
assert(*bp != (void*)(uintptr_t)-1);
908-
return; // already was unique_ready
909-
}
910-
assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
911-
if (*bp == (void*)(uintptr_t)-1)
912-
*bp = (void*)(uintptr_t)-2; // now immediate
913-
}
907+
assert(!immediate || *bp != (void*)(uintptr_t)-2);
908+
if (*bp == HT_NOTFOUND)
909+
*bp = (void*)(uintptr_t)-1; // now enqueued
910+
else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1)
911+
return;
914912

915-
if (immediate)
913+
if (immediate) {
914+
*bp = (void*)(uintptr_t)-2; // now immediate
916915
jl_insert_into_serialization_queue(s, v, recursive, immediate);
917-
else
916+
}
917+
else {
918918
arraylist_push(&object_worklist, (void*)v);
919+
}
919920
}
920921

921922
// Do a pre-order traversal of the to-serialize worklist, in the identical order
@@ -1065,8 +1066,10 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o
10651066
if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
10661067
if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
10671068
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
1068-
else
1069+
else if (jl_is_method_instance(fld))
10691070
arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
1071+
else
1072+
assert(0 && "unknown object type with needs_uniquing set");
10701073
}
10711074
}
10721075

@@ -1224,7 +1227,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
12241227
write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
12251228
continue;
12261229
}
1227-
else if (!jl_is_datatype(v)) {
1230+
else if (jl_is_datatype(v)) {
1231+
for (size_t i = 0; i < s->uniquing_super.len; i++) {
1232+
if (s->uniquing_super.items[i] == (void*)v) {
1233+
s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super);
1234+
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3));
1235+
}
1236+
}
1237+
}
1238+
else {
12281239
assert(jl_is_datatype_singleton(t) && "unreachable");
12291240
}
12301241
}
@@ -1589,6 +1600,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
15891600
ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
15901601
}
15911602
}
1603+
void *superidx = ptrhash_get(&serialization_order, dt->super);
1604+
if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super))
1605+
arraylist_push(&s->uniquing_super, dt->super);
15921606
}
15931607
else if (jl_is_typename(v)) {
15941608
assert(f == s->s);
@@ -1633,6 +1647,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
16331647
}
16341648
}
16351649
}
1650+
assert(s->uniquing_super.len == 0);
16361651
}
16371652

16381653
// In deserialization, create Symbols and set up the
@@ -2397,7 +2412,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
23972412
ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
23982413
}
23992414
htable_new(&serialization_order, 25000);
2400-
htable_new(&unique_ready, 0);
24012415
htable_new(&nullptrs, 0);
24022416
arraylist_new(&object_worklist, 0);
24032417
arraylist_new(&serialization_queue, 0);
@@ -2420,6 +2434,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
24202434
arraylist_new(&s.relocs_list, 0);
24212435
arraylist_new(&s.gctags_list, 0);
24222436
arraylist_new(&s.uniquing_types, 0);
2437+
arraylist_new(&s.uniquing_super, 0);
24232438
arraylist_new(&s.uniquing_objs, 0);
24242439
arraylist_new(&s.fixup_types, 0);
24252440
arraylist_new(&s.fixup_objs, 0);
@@ -2652,6 +2667,11 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
26522667
arraylist_free(&object_worklist);
26532668
arraylist_free(&serialization_queue);
26542669
arraylist_free(&layout_table);
2670+
arraylist_free(&s.uniquing_types);
2671+
arraylist_free(&s.uniquing_super);
2672+
arraylist_free(&s.uniquing_objs);
2673+
arraylist_free(&s.fixup_types);
2674+
arraylist_free(&s.fixup_objs);
26552675
arraylist_free(&s.ccallable_list);
26562676
arraylist_free(&s.relocs_list);
26572677
arraylist_free(&s.gctags_list);
@@ -2661,7 +2681,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
26612681
if (worklist)
26622682
htable_free(&external_objects);
26632683
htable_free(&serialization_order);
2664-
htable_free(&unique_ready);
26652684
htable_free(&nullptrs);
26662685
htable_free(&symbol_table);
26672686
htable_free(&fptr_to_id);
@@ -3026,31 +3045,43 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
30263045
uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
30273046
// check whether we are operating on the typetag
30283047
// (needing to ignore GC bits) or a regular field
3029-
int tag = (item & 1) == 1;
3030-
// check whether this is a gvar index
3031-
int gvar = (item & 2) == 2;
3048+
// and check whether this is a gvar index
3049+
int tag = (item & 3);
30323050
item &= ~(uintptr_t)3;
30333051
uintptr_t *pfld;
30343052
jl_value_t **obj, *newobj;
3035-
if (gvar) {
3053+
if (tag == 3) {
3054+
obj = (jl_value_t**)(image_base + item);
3055+
pfld = NULL;
3056+
for (size_t i = 0; i < delay_list.len; i += 2) {
3057+
if (obj == (jl_value_t **)delay_list.items[i + 0]) {
3058+
pfld = (uintptr_t*)delay_list.items[i + 1];
3059+
delay_list.items[i + 1] = arraylist_pop(&delay_list);
3060+
delay_list.items[i + 0] = arraylist_pop(&delay_list);
3061+
break;
3062+
}
3063+
}
3064+
assert(pfld);
3065+
}
3066+
else if (tag == 2) {
30363067
if (image->gvars_base == NULL)
30373068
continue;
30383069
item >>= 2;
30393070
assert(item < s.gvar_record->size / sizeof(reloc_t));
30403071
pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
30413072
obj = *(jl_value_t***)pfld;
3042-
assert(tag == 0);
30433073
}
30443074
else {
30453075
pfld = (uintptr_t*)(image_base + item);
3046-
if (tag)
3076+
if (tag == 1)
30473077
obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
30483078
else
30493079
obj = *(jl_value_t***)pfld;
30503080
if ((char*)obj > (char*)pfld) {
3081+
// this must be the super field
30513082
assert(tag == 0);
3052-
arraylist_push(&delay_list, pfld);
30533083
arraylist_push(&delay_list, obj);
3084+
arraylist_push(&delay_list, pfld);
30543085
ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
30553086
*pfld = (uintptr_t)NULL;
30563087
continue;
@@ -3100,25 +3131,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31003131
assert(newobj != jl_nothing);
31013132
arraylist_push(&cleanup_list, (void*)obj);
31023133
}
3103-
if (tag)
3134+
if (tag == 1)
31043135
*pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
31053136
else
31063137
*pfld = (uintptr_t)newobj;
31073138
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
31083139
assert(jl_typetagis(obj, otyp));
31093140
}
3110-
// A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
3111-
// They cannot be instances though, since the type must fully exist before the singleton field can be allocated
3112-
for (size_t i = 0; i < delay_list.len; ) {
3113-
uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
3114-
jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
3115-
assert(jl_is_datatype(obj));
3116-
jl_datatype_t *dt = (jl_datatype_t*)obj[0];
3117-
assert(jl_is_datatype(dt));
3118-
jl_value_t *newobj = (jl_value_t*)dt;
3119-
*pfld = (uintptr_t)newobj;
3120-
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
3121-
}
3141+
assert(delay_list.len == 0);
31223142
arraylist_free(&delay_list);
31233143
// now that all the fields of dt are assigned and unique, copy them into
31243144
// their final newdt memory location: this ensures we do not accidentally
@@ -3166,11 +3186,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
31663186
for (size_t i = 0; i < s.uniquing_objs.len; i++) {
31673187
uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
31683188
// check whether this is a gvar index
3169-
int gvar = (item & 2) == 2;
3189+
int tag = (item & 3);
3190+
assert(tag == 0 || tag == 2);
31703191
item &= ~(uintptr_t)3;
31713192
uintptr_t *pfld;
31723193
jl_value_t **obj, *newobj;
3173-
if (gvar) {
3194+
if (tag == 2) {
31743195
if (image->gvars_base == NULL)
31753196
continue;
31763197
item >>= 2;

src/staticdata_utils.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
4545
jl_datatype_t *dt = (jl_datatype_t*)t;
4646
assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
4747
jl_datatype_t *super = dt->super;
48-
// check if super is news, since then we must be new also
49-
// (it is also possible that super is indeterminate now, wait for `t`
50-
// to be resolved, then will be determined later and fixed up by the
51-
// delay_list, for this and any other references to it).
52-
while (super != jl_any_type) {
53-
assert(super);
48+
// fast-path: check if super is in news, since then we must be new also
49+
// (it is also possible that super is indeterminate or NULL right now,
50+
// waiting for `t` to be resolved, then will be determined later as
51+
// soon as possible afterwards).
52+
while (super != NULL && super != jl_any_type) {
5453
if (ptrhash_has(news, (void*)super))
5554
return 1;
5655
if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
57-
break; // fast-path for rejection of super
56+
break; // the rest must all be non-new
5857
// otherwise super might be something that was not cached even though a later supertype might be
5958
// for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
6059
super = super->super;

test/precompile.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ precompile_test_harness(false) do dir
115115
d = den(a)
116116
return h
117117
end
118+
abstract type AbstractAlgebraMap{A} end
119+
struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end
118120
end
119121
""")
120122
write(Foo2_file,
@@ -130,7 +132,7 @@ precompile_test_harness(false) do dir
130132
write(Foo_file,
131133
"""
132134
module $Foo_module
133-
import $FooBase_module, $FooBase_module.typeA
135+
import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism
134136
import $Foo2_module: $Foo2_module, override, overridenc
135137
import $FooBase_module.hash
136138
import Test
@@ -211,6 +213,8 @@ precompile_test_harness(false) do dir
211213
Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2
212214
Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3
213215
216+
const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing}
217+
const GAPType2 = GAPGroupHomomorphism{1, 2}
214218
215219
# issue #28297
216220
mutable struct Result

0 commit comments

Comments
 (0)