Skip to content

Commit 860188f

Browse files
authored
remove some more serialized junk from the sysimg (#58078)
Together, the commits save roughly 184 MB -> 159 MB by not storing unnecessary inferred IR ``` $ llvm-size -A usr/lib/julia/sys.dylib usr/lib/julia/sys.dylib : section size addr __text 15522036 1904 __stubs 1128 15523940 __stub_helper 1152 15525068 __const 438592 15526224 __cstring 297006 15964816 __unwind_info 247368 16261824 __eh_frame 27144 16509192 __got 104 16547840 __const 632 16547944 __la_symbol_ptr 752 16564224 __data 142444216 16564992 __common 100800 159009216 Total 159080930 ```
2 parents 1faa698 + fb5fffd commit 860188f

File tree

5 files changed

+103
-78
lines changed

5 files changed

+103
-78
lines changed

Compiler/src/typeinfer.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ function transform_result_for_cache(interp::AbstractInterpreter, result::Inferen
399399
if isa(src, OptimizationState)
400400
opt = src
401401
inlining_cost = compute_inlining_cost(interp, result, opt.optresult)
402-
discard_optimized_result(interp, opt, inlining_cost) && return nothing
402+
discard_optimized_result(interp, opt, inlining_cost, result.ipo_effects) && return nothing
403403
src = ir_to_codeinf!(opt)
404404
end
405405
if isa(src, CodeInfo)
@@ -409,7 +409,7 @@ function transform_result_for_cache(interp::AbstractInterpreter, result::Inferen
409409
return src
410410
end
411411

412-
function discard_optimized_result(interp::AbstractInterpreter, opt#=::OptimizationState=#, inlining_cost#=::InlineCostType=#)
412+
function discard_optimized_result(interp::AbstractInterpreter, opt#=::OptimizationState=#, inlining_cost#=::InlineCostType=#, effects::Effects)
413413
may_discard_trees(interp) || return false
414414
return inlining_cost == MAX_INLINE_COST
415415
end

src/aotcompile.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -675,20 +675,6 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
675675
fargs[0] = (jl_value_t*)codeinfos;
676676
void *data = jl_emit_native(codeinfos, llvmmod, &cgparams, external_linkage);
677677

678-
// examine everything just emitted and save it to the caches
679-
if (!external_linkage) {
680-
for (size_t i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
681-
jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
682-
if (jl_is_code_instance(item)) {
683-
// now add it to our compilation results
684-
jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
685-
jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
686-
assert(jl_is_code_info(src));
687-
jl_add_codeinst_to_cache(codeinst, src);
688-
}
689-
}
690-
}
691-
692678
// move everything inside, now that we've merged everything
693679
// (before adding the exported headers)
694680
((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) {

src/gf.c

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,30 +2837,10 @@ void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_c
28372837

28382838
jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
28392839

2840-
JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src)
2841-
{
2842-
assert(jl_is_code_info(src));
2843-
jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
2844-
if (jl_generating_output() && jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
2845-
jl_value_t *compressed = jl_compress_ir(mi->def.method, src);
2846-
// These should already be compatible (and should be an assert), but make sure of it anyways
2847-
if (jl_is_svec(src->edges)) {
2848-
jl_atomic_store_release(&codeinst->edges, (jl_svec_t*)src->edges);
2849-
jl_gc_wb(codeinst, src->edges);
2850-
}
2851-
jl_atomic_store_release(&codeinst->debuginfo, src->debuginfo);
2852-
jl_gc_wb(codeinst, src->debuginfo);
2853-
jl_atomic_store_release(&codeinst->inferred, compressed);
2854-
jl_gc_wb(codeinst, compressed);
2855-
}
2856-
}
2857-
2858-
28592840
JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src)
28602841
{
28612842
assert(jl_is_code_info(src));
28622843
jl_emit_codeinst_to_jit(codeinst, src);
2863-
jl_add_codeinst_to_cache(codeinst, src);
28642844
}
28652845

28662846
jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)

src/julia_internal.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,6 @@ JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROP
685685
JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile);
686686
JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);
687687
JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
688-
JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src);
689688

690689
JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
691690
JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(

src/staticdata.c

Lines changed: 101 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -863,40 +863,60 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
863863
}
864864
goto done_fields; // for now
865865
}
866-
if (s->incremental && jl_is_method_instance(v)) {
866+
if (jl_is_method_instance(v)) {
867867
jl_method_instance_t *mi = (jl_method_instance_t*)v;
868-
jl_value_t *def = mi->def.value;
869-
if (needs_uniquing(v, s->query_cache)) {
870-
// we only need 3 specific fields of this (the rest are not used)
871-
jl_queue_for_serialization(s, mi->def.value);
872-
jl_queue_for_serialization(s, mi->specTypes);
873-
jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
874-
goto done_fields;
875-
}
876-
else if (jl_is_method(def) && jl_object_in_image(def)) {
877-
// we only need 3 specific fields of this (the rest are restored afterward, if valid)
878-
// in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
879-
// so must not be present here
880-
record_field_change((jl_value_t**)&mi->backedges, NULL);
881-
record_field_change((jl_value_t**)&mi->cache, NULL);
868+
if (s->incremental) {
869+
jl_value_t *def = mi->def.value;
870+
if (needs_uniquing(v, s->query_cache)) {
871+
// we only need 3 specific fields of this (the rest are not used)
872+
jl_queue_for_serialization(s, mi->def.value);
873+
jl_queue_for_serialization(s, mi->specTypes);
874+
jl_queue_for_serialization(s, (jl_value_t*)mi->sparam_vals);
875+
goto done_fields;
876+
}
877+
else if (jl_is_method(def) && jl_object_in_image(def)) {
878+
// we only need 3 specific fields of this (the rest are restored afterward, if valid)
879+
// in particular, cache is repopulated by jl_mi_cache_insert for all foreign function,
880+
// so must not be present here
881+
record_field_change((jl_value_t**)&mi->backedges, NULL);
882+
record_field_change((jl_value_t**)&mi->cache, NULL);
883+
}
884+
else {
885+
assert(!needs_recaching(v, s->query_cache));
886+
}
887+
// n.b. opaque closures cannot be inspected and relied upon like a
888+
// normal method since they can get improperly introduced by generated
889+
// functions, so if they appeared at all, we will probably serialize
890+
// them wrong and segfault. The jl_code_for_staged function should
891+
// prevent this from happening, so we do not need to detect that user
892+
// error now.
882893
}
883-
else {
884-
assert(!needs_recaching(v, s->query_cache));
894+
// don't recurse into all backedges memory (yet)
895+
jl_value_t *backedges = get_replaceable_field((jl_value_t**)&mi->backedges, 1);
896+
if (backedges) {
897+
jl_queue_for_serialization_(s, (jl_value_t*)((jl_array_t*)backedges)->ref.mem, 0, 1);
898+
size_t i = 0, n = jl_array_nrows(backedges);
899+
while (i < n) {
900+
jl_value_t *invokeTypes;
901+
jl_code_instance_t *caller;
902+
i = get_next_edge((jl_array_t*)backedges, i, &invokeTypes, &caller);
903+
if (invokeTypes)
904+
jl_queue_for_serialization(s, invokeTypes);
905+
}
885906
}
886-
// n.b. opaque closures cannot be inspected and relied upon like a
887-
// normal method since they can get improperly introduced by generated
888-
// functions, so if they appeared at all, we will probably serialize
889-
// them wrong and segfault. The jl_code_for_staged function should
890-
// prevent this from happening, so we do not need to detect that user
891-
// error now.
892-
}
893-
if (s->incremental && jl_is_binding(v)) {
894-
if (needs_uniquing(v, s->query_cache)) {
895-
jl_binding_t *b = (jl_binding_t*)v;
907+
}
908+
if (jl_is_binding(v)) {
909+
jl_binding_t *b = (jl_binding_t*)v;
910+
if (s->incremental && needs_uniquing(v, s->query_cache)) {
896911
jl_queue_for_serialization(s, b->globalref->mod);
897912
jl_queue_for_serialization(s, b->globalref->name);
898913
goto done_fields;
899914
}
915+
// don't recurse into backedges memory (yet)
916+
jl_value_t *backedges = get_replaceable_field((jl_value_t**)&b->backedges, 1);
917+
if (backedges) {
918+
jl_queue_for_serialization_(s, (jl_value_t*)((jl_array_t*)backedges)->ref.mem, 0, 1);
919+
}
900920
}
901921
if (s->incremental && jl_is_globalref(v)) {
902922
jl_globalref_t *gr = (jl_globalref_t*)v;
@@ -914,18 +934,20 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
914934
assert(!jl_object_in_image((jl_value_t*)tn->wrapper));
915935
}
916936
}
917-
if (s->incremental && jl_is_code_instance(v)) {
937+
if (jl_is_code_instance(v)) {
918938
jl_code_instance_t *ci = (jl_code_instance_t*)v;
919939
jl_method_instance_t *mi = jl_get_ci_mi(ci);
920-
// make sure we don't serialize other reachable cache entries of foreign methods
921-
// Should this now be:
922-
// if (ci !in ci->defs->cache)
923-
// record_field_change((jl_value_t**)&ci->next, NULL);
924-
// Why are we checking that the method/module this originates from is in_image?
925-
// and then disconnect this CI?
926-
if (jl_object_in_image((jl_value_t*)mi->def.value)) {
927-
// TODO: if (ci in ci->defs->cache)
928-
record_field_change((jl_value_t**)&ci->next, NULL);
940+
if (s->incremental) {
941+
// make sure we don't serialize other reachable cache entries of foreign methods
942+
// Should this now be:
943+
// if (ci !in ci->defs->cache)
944+
// record_field_change((jl_value_t**)&ci->next, NULL);
945+
// Why are we checking that the method/module this originates from is in_image?
946+
// and then disconnect this CI?
947+
if (jl_object_in_image((jl_value_t*)mi->def.value)) {
948+
// TODO: if (ci in ci->defs->cache)
949+
record_field_change((jl_value_t**)&ci->next, NULL);
950+
}
929951
}
930952
jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred);
931953
if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized)
@@ -953,7 +975,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
953975
if (inferred == jl_nothing) {
954976
record_field_change((jl_value_t**)&ci->inferred, jl_nothing);
955977
}
956-
else if (jl_is_string(inferred)) {
978+
else if (s->incremental && jl_is_string(inferred)) {
957979
// New roots for external methods
958980
if (jl_object_in_image((jl_value_t*)def)) {
959981
void **pfound = ptrhash_bp(&s->method_roots_index, def);
@@ -2572,6 +2594,35 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
25722594
jl_svecset(cache, ins++, jl_nothing);
25732595
}
25742596

2597+
static void jl_prune_mi_backedges(jl_array_t *backedges)
2598+
{
2599+
if (backedges == NULL)
2600+
return;
2601+
size_t i = 0, ins = 0, n = jl_array_nrows(backedges);
2602+
while (i < n) {
2603+
jl_value_t *invokeTypes;
2604+
jl_code_instance_t *caller;
2605+
i = get_next_edge(backedges, i, &invokeTypes, &caller);
2606+
if (ptrhash_get(&serialization_order, caller) != HT_NOTFOUND)
2607+
ins = set_next_edge(backedges, ins, invokeTypes, caller);
2608+
}
2609+
jl_array_del_end(backedges, n - ins);
2610+
}
2611+
2612+
static void jl_prune_binding_backedges(jl_array_t *backedges)
2613+
{
2614+
if (backedges == NULL)
2615+
return;
2616+
size_t i = 0, ins = 0, n = jl_array_nrows(backedges);
2617+
for (i = 0; i < n; i++) {
2618+
jl_value_t *b = jl_array_ptr_ref(backedges, i);
2619+
if (ptrhash_get(&serialization_order, b) != HT_NOTFOUND)
2620+
jl_array_ptr_set(backedges, ins, b);
2621+
}
2622+
jl_array_del_end(backedges, n - ins);
2623+
}
2624+
2625+
25752626
uint_t bindingkey_hash(size_t idx, jl_value_t *data);
25762627

25772628
static void jl_prune_module_bindings(jl_module_t * m) JL_GC_DISABLED
@@ -3145,12 +3196,11 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
31453196
jl_queue_for_serialization(&s, global_roots_keyset);
31463197
jl_serialize_reachable(&s);
31473198
}
3148-
// step 1.5: prune (garbage collect) some special weak references from
3149-
// built-in type caches too
3199+
// step 1.5: prune (garbage collect) some special weak references known caches
31503200
for (i = 0; i < serialization_queue.len; i++) {
31513201
jl_value_t *v = (jl_value_t*)serialization_queue.items[i];
31523202
if (jl_options.trim) {
3153-
if (jl_is_method(v)){
3203+
if (jl_is_method(v)) {
31543204
jl_method_t *m = (jl_method_t*)v;
31553205
jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations);
31563206
if (!jl_is_svec(specializations_))
@@ -3178,6 +3228,16 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
31783228
jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
31793229
jl_prune_type_cache_linear(jl_atomic_load_relaxed(&tn->linearcache));
31803230
}
3231+
else if (jl_is_method_instance(v)) {
3232+
jl_method_instance_t *mi = (jl_method_instance_t*)v;
3233+
jl_value_t *backedges = get_replaceable_field((jl_value_t**)&mi->backedges, 1);
3234+
jl_prune_mi_backedges((jl_array_t*)backedges);
3235+
}
3236+
else if (jl_is_binding(v)) {
3237+
jl_binding_t *b = (jl_binding_t*)v;
3238+
jl_value_t *backedges = get_replaceable_field((jl_value_t**)&b->backedges, 1);
3239+
jl_prune_binding_backedges((jl_array_t*)backedges);
3240+
}
31813241
}
31823242
}
31833243

0 commit comments

Comments
 (0)