diff --git a/deps/mmtk_julia.mk b/deps/mmtk_julia.mk index 424113fd4164c..2c561b1b6a185 100644 --- a/deps/mmtk_julia.mk +++ b/deps/mmtk_julia.mk @@ -3,7 +3,7 @@ # Both MMTK_MOVING and MMTK_PLAN should be specified in the Make.user file. # At this point, since we only support non-moving this is always set to 0 # FIXME: change it to `?:` when introducing moving plans -MMTK_MOVING := 0 +MMTK_MOVING ?= 0 MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING) ifneq ($(USE_BINARYBUILDER_MMTK_JULIA),1) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index d91da9c64cda9..f38d3029184c9 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -72,6 +72,7 @@ typedef struct { SmallVector jl_sysimg_fvars; SmallVector jl_sysimg_gvars; std::map> jl_fvar_map; + // This holds references to the heap. Need to be pinned. SmallVector jl_value_to_llvm; SmallVector jl_external_to_llvm; } jl_native_code_desc_t; diff --git a/src/ast.c b/src/ast.c index 0f24d96393f2f..1e26e4fadf048 100644 --- a/src/ast.c +++ b/src/ast.c @@ -780,6 +780,9 @@ static value_t julia_to_list2_noalloc(fl_context_t *fl_ctx, jl_value_t *a, jl_va static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) { + // The following code will take internal pointers to v's fields. We need to make sure + // that v will not be moved by GC. + OBJ_PIN(v); value_t retval; if (julia_to_scm_noalloc1(fl_ctx, v, &retval)) return retval; diff --git a/src/builtins.c b/src/builtins.c index f3d2dfad42819..860571c764ea1 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -344,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN i++; pe = pe->prev; } + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + OBJ_PIN(v); uintptr_t bits = jl_astaggedvalue(v)->header; if (bits & GC_IN_IMAGE) return ((uintptr_t*)v)[-2]; @@ -400,6 +403,10 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT // a few select pointers (notably symbol) also have special hash values // which may affect the stability of the objectid hash, even though // they don't affect egal comparison + + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + PTR_PIN(v); // This has to be a pointer pin -- v could be an internal pointer return bits_hash(v, sz) ^ h; } if (dt == jl_unionall_type) @@ -460,6 +467,10 @@ static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOT uintptr_t bits = jl_astaggedvalue(v)->header; if (bits & GC_IN_IMAGE) return ((uintptr_t*)v)[-2]; + + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + OBJ_PIN(v); return inthash((uintptr_t)v); } return immut_id_(dt, v, dt->hash); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 39879503596fe..71542e3f14a44 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -397,6 +397,7 @@ static Constant *julia_pgv(jl_codegen_params_t ¶ms, Module *M, const char *c // emit a GlobalVariable for a jl_value_t named "cname" // store the name given so we can reuse it (facilitating merging later) // so first see if there already is a GlobalVariable for this address + OBJ_PIN(addr); // This will be stored in the native heap. We need to pin it. GlobalVariable* &gv = params.global_targets[addr]; StringRef localname; std::string gvname; @@ -564,6 +565,8 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) { if (p == NULL) return Constant::getNullValue(ctx.types().T_pjlvalue); + // Pointers to p will be emitted into the code. Make sure p won't be moved by GC. + OBJ_PIN(p); Value *pgv = literal_pointer_val_slot(ctx.emission_context, jl_Module, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto load = ai.decorateInst(maybe_mark_load_dereferenceable( diff --git a/src/codegen.cpp b/src/codegen.cpp index 5507bd7bad801..64ddc60b61ce1 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1822,6 +1822,7 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext())); } jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete; @@ -1838,6 +1839,7 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. if (Vboxed) assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext())); assert(tbaa != nullptr); @@ -1858,6 +1860,8 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. + OBJ_PIN(constant); // jl_cgval_t could be in the native heap. We have to pin the object references in it. assert(jl_is_datatype(typ)); assert(constant); } @@ -1874,6 +1878,8 @@ struct jl_cgval_t { promotion_point(v.promotion_point), promotion_ssa(v.promotion_ssa) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. + OBJ_PIN(constant); // jl_cgval_t could be in the native heap. We have to pin the object references in it. if (Vboxed) assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext())); // this constructor expects we had a badly or equivalently typed version @@ -1946,6 +1952,7 @@ class jl_codectx_t { std::map phic_slots; std::map > scope_restore; SmallVector SAvalues; + // The vector holds reference to Julia obj ref. We need to pin jl_value_t*. SmallVector, jl_value_t *>, 0> PhiNodes; SmallVector ssavalue_assigned; SmallVector ssavalue_usecount; @@ -6240,6 +6247,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) decay_derived(ctx, phi)); jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType)); val.Vboxed = ptr_phi; + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; @@ -6249,6 +6257,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi"); Tindex_phi->insertInto(BB, InsertPt); jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; @@ -6299,6 +6308,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) value_phi->insertInto(BB, InsertPt); slot = mark_julia_type(ctx, value_phi, isboxed, phiType); } + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r)); ctx.SAvalues[idx] = slot; ctx.ssavalue_assigned[idx] = true; diff --git a/src/datatype.c b/src/datatype.c index 240b1ace2295f..630972ff8b7da 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -62,7 +62,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu { jl_task_t *ct = jl_current_task; jl_typename_t *tn = - (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), + (jl_typename_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_typename_t), jl_typename_type); tn->name = name; tn->module = module; @@ -95,7 +95,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_dat jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_task_t *ct = jl_current_task; - jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); + jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; t->hasfreetypevars = 0; diff --git a/src/gc-common.c b/src/gc-common.c index c07b707b17709..0816db696fdb0 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -540,6 +540,11 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } +JL_DLLEXPORT jl_value_t *(jl_gc_alloc_nonmoving)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_nonmoving_(ptls, sz, ty); +} + JL_DLLEXPORT void *jl_malloc(size_t sz) { return jl_gc_counted_malloc(sz); diff --git a/src/gc-interface.h b/src/gc-interface.h index 618077b127803..a4d8f0561f35d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -101,6 +101,10 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); // Returns whether the thread with `tid` is a collector thread JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; +// Pinning objects; Returns whether the object has been pinned by this call. +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj); +// Pinning objects through a potential internal pointer; Returns whether the object has been pinned by this call. +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr); // Returns which GC implementation is being used and possibly its version according to the list of supported GCs // NB: it should clearly identify the GC by including e.g. ‘stock’ or ‘mmtk’ as a substring. JL_DLLEXPORT const char* jl_gc_active_impl(void); @@ -108,6 +112,16 @@ JL_DLLEXPORT const char* jl_gc_active_impl(void); // each GC should implement it but it will most likely not be used by other code in the runtime. // It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk. JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT; +// Notifies the GC that the given thread is about to yield for a GC. ctx is the ucontext for the thread +// if it is already fetched by the caller, otherwise it is NULL. +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx); + +// TODO: The preserve hook functions may be temporary. We should see the performance impact of the change. + +// Runtime hook for gc preserve begin. The GC needs to make sure that the preserved objects and its children stay alive and won't move. +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT; +// Runtime hook for gc preserve end. The GC needs to make sure that the preserved objects and its children stay alive and won't move. +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -148,6 +162,9 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record // an allocation of that type in the allocation profiler. struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); +// Similar to jl_gc_alloc_, except that the GC needs to make sure the object allocated from this function will +// not be moved by the GC. +struct _jl_value_t *jl_gc_alloc_nonmoving_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -214,6 +231,10 @@ struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAF // The GC may use that information to, for instance, determine that such objects should // be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); +// This function notifies the GC about memory addresses that are set when allocating the boot image. +// The GC may use that information to, for instance, determine that all objects in that chunk of memory should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_alloc(const char* img_data, size_t len); // ========================================================================= // // Runtime Write-Barriers diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index a6650dd7cb68c..ef3e9c77a660d 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -40,19 +40,7 @@ static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); static memsize_t max_total_memory = (memsize_t) MAX32HEAP; #endif -// ========================================================================= // -// Defined by the binding -// ========================================================================= // -extern void mmtk_julia_copy_stack_check(int copy_stack); -extern void mmtk_gc_init(uintptr_t min_heap_size, uintptr_t max_heap_size, uintptr_t n_gcthreads, uintptr_t header_size, uintptr_t tag); -extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); -extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); -extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); -extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); -extern void mmtk_store_obj_size_c(void* obj, size_t size); -extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; -extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; // ========================================================================= // // GC Initialization and Control @@ -294,6 +282,8 @@ JL_DLLEXPORT void jl_gc_prepare_to_collect(void) gc_num.total_time_to_safepoint += duration; if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { + // This thread will yield. + jl_gc_notify_thread_yield(ptls, NULL); JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ mmtk_block_thread_for_gc(); @@ -323,6 +313,27 @@ JL_DLLEXPORT void jl_gc_prepare_to_collect(void) errno = last_errno; } +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return mmtk_pin_object(obj); +} + +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr) { + return mmtk_pin_pointer(ptr); +} + +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx) { + if (ctx == NULL) { + // Save the context for the thread as it was running at the time of the call + int r = getcontext(&ptls->gc_tls.ctx_at_the_time_gc_started); + if (r == -1) { + jl_safe_printf("Failed to save context for conservative scanning\n"); + abort(); + } + return; + } + memcpy(&ptls->gc_tls.ctx_at_the_time_gc_started, ctx, sizeof(ucontext_t)); +} + // ========================================================================= // // GC Statistics // ========================================================================= // @@ -493,35 +504,292 @@ static void add_node_to_tpinned_roots_buffer(RootsWorkClosure* closure, RootsWor } } -JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure) +// staticdata_utils.c +extern jl_array_t *internal_methods; +extern jl_array_t *newly_inferred; +// task.c +extern jl_function_t* task_done_hook_func; + +#define TRACE_GLOBALLY_ROOTED(r) add_node_to_roots_buffer(closure, buf, buf_len, r) + +// This is a list of global variables that are marked with JL_GLOBALLY_ROOTED. We need to make sure that they +// won't be moved. Otherwise, when we access those objects from the C global variables, we may see moved references. +void trace_full_globally_rooted(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len) { - // Create a new buf - RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true); - size_t len = 0; + TRACE_GLOBALLY_ROOTED(cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_typeinf_func); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); + // Max 4096 + for (size_t i = 0; i < N_CALL_CACHE; i++) { + TRACE_GLOBALLY_ROOTED(call_cache[i]); + } + // julia_internal.h + TRACE_GLOBALLY_ROOTED(jl_type_type_mt); + TRACE_GLOBALLY_ROOTED(jl_nonfunction_mt); + TRACE_GLOBALLY_ROOTED(jl_kwcall_mt); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_method); + TRACE_GLOBALLY_ROOTED(jl_nulldebuginfo); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); + TRACE_GLOBALLY_ROOTED(jl_module_init_order); + // TRACE_GLOBALLY_ROOTED(jl_current_modules); -- we cannot trace a htable_t. So we trace each module. + for (size_t i = 0; i < jl_current_modules.size; i += 2) { + if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { + TRACE_GLOBALLY_ROOTED(jl_current_modules.table[i]); + } + } + for (size_t i = 0; i < N_CALL_CACHE; i++) { + jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + TRACE_GLOBALLY_ROOTED(v); + } + TRACE_GLOBALLY_ROOTED(jl_precompile_toplevel_module); + TRACE_GLOBALLY_ROOTED(jl_global_roots_list); + TRACE_GLOBALLY_ROOTED(jl_global_roots_keyset); + TRACE_GLOBALLY_ROOTED(precompile_field_replace); + // julia.h + TRACE_GLOBALLY_ROOTED(jl_typeofbottom_type); + TRACE_GLOBALLY_ROOTED(jl_datatype_type); + TRACE_GLOBALLY_ROOTED(jl_uniontype_type); + TRACE_GLOBALLY_ROOTED(jl_unionall_type); + TRACE_GLOBALLY_ROOTED(jl_tvar_type); + + TRACE_GLOBALLY_ROOTED(jl_any_type); + TRACE_GLOBALLY_ROOTED(jl_type_type); + TRACE_GLOBALLY_ROOTED(jl_typename_type); + TRACE_GLOBALLY_ROOTED(jl_type_typename); + TRACE_GLOBALLY_ROOTED(jl_symbol_type); + TRACE_GLOBALLY_ROOTED(jl_ssavalue_type); + TRACE_GLOBALLY_ROOTED(jl_slotnumber_type); + TRACE_GLOBALLY_ROOTED(jl_argument_type); + TRACE_GLOBALLY_ROOTED(jl_const_type); + TRACE_GLOBALLY_ROOTED(jl_partial_struct_type); + TRACE_GLOBALLY_ROOTED(jl_partial_opaque_type); + TRACE_GLOBALLY_ROOTED(jl_interconditional_type); + TRACE_GLOBALLY_ROOTED(jl_method_match_type); + TRACE_GLOBALLY_ROOTED(jl_simplevector_type); + TRACE_GLOBALLY_ROOTED(jl_tuple_typename); + TRACE_GLOBALLY_ROOTED(jl_vecelement_typename); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type); + TRACE_GLOBALLY_ROOTED(jl_emptytuple_type); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type_type); + TRACE_GLOBALLY_ROOTED(jl_vararg_type); + TRACE_GLOBALLY_ROOTED(jl_function_type); + TRACE_GLOBALLY_ROOTED(jl_builtin_type); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_type); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_typename); + + TRACE_GLOBALLY_ROOTED(jl_bottom_type); + TRACE_GLOBALLY_ROOTED(jl_method_instance_type); + TRACE_GLOBALLY_ROOTED(jl_code_instance_type); + TRACE_GLOBALLY_ROOTED(jl_code_info_type); + TRACE_GLOBALLY_ROOTED(jl_debuginfo_type); + TRACE_GLOBALLY_ROOTED(jl_method_type); + TRACE_GLOBALLY_ROOTED(jl_module_type); + TRACE_GLOBALLY_ROOTED(jl_addrspace_type); + TRACE_GLOBALLY_ROOTED(jl_addrspacecore_type); + TRACE_GLOBALLY_ROOTED(jl_abstractarray_type); + TRACE_GLOBALLY_ROOTED(jl_densearray_type); + TRACE_GLOBALLY_ROOTED(jl_array_type); + TRACE_GLOBALLY_ROOTED(jl_array_typename); + TRACE_GLOBALLY_ROOTED(jl_genericmemory_type); + TRACE_GLOBALLY_ROOTED(jl_genericmemory_typename); + TRACE_GLOBALLY_ROOTED(jl_genericmemoryref_type); + TRACE_GLOBALLY_ROOTED(jl_genericmemoryref_typename); + TRACE_GLOBALLY_ROOTED(jl_weakref_type); + TRACE_GLOBALLY_ROOTED(jl_abstractstring_type); + TRACE_GLOBALLY_ROOTED(jl_string_type); + TRACE_GLOBALLY_ROOTED(jl_errorexception_type); + TRACE_GLOBALLY_ROOTED(jl_argumenterror_type); + TRACE_GLOBALLY_ROOTED(jl_loaderror_type); + TRACE_GLOBALLY_ROOTED(jl_initerror_type); + TRACE_GLOBALLY_ROOTED(jl_typeerror_type); + TRACE_GLOBALLY_ROOTED(jl_methoderror_type); + TRACE_GLOBALLY_ROOTED(jl_undefvarerror_type); + TRACE_GLOBALLY_ROOTED(jl_fielderror_type); + TRACE_GLOBALLY_ROOTED(jl_atomicerror_type); + TRACE_GLOBALLY_ROOTED(jl_missingcodeerror_type); + TRACE_GLOBALLY_ROOTED(jl_lineinfonode_type); + TRACE_GLOBALLY_ROOTED(jl_stackovf_exception); + TRACE_GLOBALLY_ROOTED(jl_memory_exception); + TRACE_GLOBALLY_ROOTED(jl_readonlymemory_exception); + TRACE_GLOBALLY_ROOTED(jl_diverror_exception); + TRACE_GLOBALLY_ROOTED(jl_undefref_exception); + TRACE_GLOBALLY_ROOTED(jl_interrupt_exception); + TRACE_GLOBALLY_ROOTED(jl_precompilable_error); + TRACE_GLOBALLY_ROOTED(jl_boundserror_type); + TRACE_GLOBALLY_ROOTED(jl_an_empty_vec_any); + TRACE_GLOBALLY_ROOTED(jl_an_empty_memory_any); + TRACE_GLOBALLY_ROOTED(jl_an_empty_string); + + TRACE_GLOBALLY_ROOTED(jl_bool_type); + TRACE_GLOBALLY_ROOTED(jl_char_type); + TRACE_GLOBALLY_ROOTED(jl_int8_type); + TRACE_GLOBALLY_ROOTED(jl_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_int16_type); + TRACE_GLOBALLY_ROOTED(jl_uint16_type); + TRACE_GLOBALLY_ROOTED(jl_int32_type); + TRACE_GLOBALLY_ROOTED(jl_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_int64_type); + TRACE_GLOBALLY_ROOTED(jl_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_float16_type); + TRACE_GLOBALLY_ROOTED(jl_float32_type); + TRACE_GLOBALLY_ROOTED(jl_float64_type); + TRACE_GLOBALLY_ROOTED(jl_floatingpoint_type); + TRACE_GLOBALLY_ROOTED(jl_number_type); + TRACE_GLOBALLY_ROOTED(jl_void_type); // deprecated + TRACE_GLOBALLY_ROOTED(jl_nothing_type); + TRACE_GLOBALLY_ROOTED(jl_signed_type); + TRACE_GLOBALLY_ROOTED(jl_voidpointer_type); + TRACE_GLOBALLY_ROOTED(jl_uint8pointer_type); + TRACE_GLOBALLY_ROOTED(jl_pointer_type); + TRACE_GLOBALLY_ROOTED(jl_llvmpointer_type); + TRACE_GLOBALLY_ROOTED(jl_ref_type); + TRACE_GLOBALLY_ROOTED(jl_pointer_typename); + TRACE_GLOBALLY_ROOTED(jl_llvmpointer_typename); + TRACE_GLOBALLY_ROOTED(jl_namedtuple_typename); + TRACE_GLOBALLY_ROOTED(jl_namedtuple_type); + TRACE_GLOBALLY_ROOTED(jl_task_type); + TRACE_GLOBALLY_ROOTED(jl_pair_type); + + TRACE_GLOBALLY_ROOTED(jl_array_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_array_any_type); + TRACE_GLOBALLY_ROOTED(jl_array_symbol_type); + TRACE_GLOBALLY_ROOTED(jl_array_int32_type); + TRACE_GLOBALLY_ROOTED(jl_array_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_array_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint16_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_memory_any_type); + TRACE_GLOBALLY_ROOTED(jl_memoryref_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_memoryref_any_type); + TRACE_GLOBALLY_ROOTED(jl_expr_type); + TRACE_GLOBALLY_ROOTED(jl_binding_type); + TRACE_GLOBALLY_ROOTED(jl_binding_partition_type); + TRACE_GLOBALLY_ROOTED(jl_globalref_type); + TRACE_GLOBALLY_ROOTED(jl_linenumbernode_type); + TRACE_GLOBALLY_ROOTED(jl_gotonode_type); + TRACE_GLOBALLY_ROOTED(jl_gotoifnot_type); + TRACE_GLOBALLY_ROOTED(jl_enternode_type); + TRACE_GLOBALLY_ROOTED(jl_returnnode_type); + TRACE_GLOBALLY_ROOTED(jl_phinode_type); + TRACE_GLOBALLY_ROOTED(jl_pinode_type); + TRACE_GLOBALLY_ROOTED(jl_phicnode_type); + TRACE_GLOBALLY_ROOTED(jl_upsilonnode_type); + TRACE_GLOBALLY_ROOTED(jl_quotenode_type); + TRACE_GLOBALLY_ROOTED(jl_newvarnode_type); + TRACE_GLOBALLY_ROOTED(jl_intrinsic_type); + TRACE_GLOBALLY_ROOTED(jl_methtable_type); + TRACE_GLOBALLY_ROOTED(jl_typemap_level_type); + TRACE_GLOBALLY_ROOTED(jl_typemap_entry_type); + + TRACE_GLOBALLY_ROOTED(jl_emptysvec); + TRACE_GLOBALLY_ROOTED(jl_emptytuple); + TRACE_GLOBALLY_ROOTED(jl_true); + TRACE_GLOBALLY_ROOTED(jl_false); + TRACE_GLOBALLY_ROOTED(jl_nothing); + TRACE_GLOBALLY_ROOTED(jl_kwcall_func); + + TRACE_GLOBALLY_ROOTED(jl_libdl_dlopen_func); + + TRACE_GLOBALLY_ROOTED(jl_main_module); + TRACE_GLOBALLY_ROOTED(jl_core_module); + TRACE_GLOBALLY_ROOTED(jl_base_module); + TRACE_GLOBALLY_ROOTED(jl_top_module); + TRACE_GLOBALLY_ROOTED(jl_libdl_module); + + // staticdata_utils.c + TRACE_GLOBALLY_ROOTED(internal_methods); + TRACE_GLOBALLY_ROOTED(newly_inferred); + // task.c + TRACE_GLOBALLY_ROOTED(task_done_hook_func); + // threading.c + // TRACE_GLOBALLY_ROOTED(jl_all_tls_states); -- we don't need to pin these. Julia TLS are allocated with calloc. +} + +// These are from gc_mark_roots -- this is not enough for a moving GC. We need to make sure +// all the globally rooted symbols are traced and will not move. This function is unused. +// We use trace_full_globally_rooted() instead. +void trace_partial_globally_rooted(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len) +{ // add module - add_node_to_roots_buffer(closure, &buf, &len, jl_main_module); + TRACE_GLOBALLY_ROOTED(jl_main_module); // buildin values - add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any); - add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order); + TRACE_GLOBALLY_ROOTED(jl_an_empty_vec_any); + TRACE_GLOBALLY_ROOTED(jl_module_init_order); for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { - add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]); + TRACE_GLOBALLY_ROOTED(jl_current_modules.table[i]); } } - add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type_type); for (size_t i = 0; i < N_CALL_CACHE; i++) { - jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); - add_node_to_roots_buffer(closure, &buf, &len, v); + jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + TRACE_GLOBALLY_ROOTED(v); } - add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); // constants - add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type); - add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_emptytuple_type); + TRACE_GLOBALLY_ROOTED(cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_global_roots_list); + TRACE_GLOBALLY_ROOTED(jl_global_roots_keyset); + TRACE_GLOBALLY_ROOTED(precompile_field_replace); +} + +JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure) +{ + // Create a new buf + RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true); + size_t len = 0; + + // globally rooted + trace_full_globally_rooted(closure, &buf, &len); + + // Simply pin things in global roots table + // size_t i; + // for (i = 0; i < jl_array_len(jl_global_roots_table); i++) { + // jl_value_t* root = jl_array_ptr_ref(jl_global_roots_table, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // for (i = 0; i < jl_global_roots_list->length; i++) { + // jl_value_t* root = jl_genericmemory_ptr_ref(jl_global_roots_list, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // for (i = 0; i < jl_global_roots_keyset->length; i++) { + // jl_value_t* root = jl_genericmemory_ptr_ref(jl_global_roots_keyset, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // add_node_to_roots_buffer(closure, &buf, &len, jl_global_roots_list); + // add_node_to_roots_buffer(closure, &buf, &len, jl_global_roots_keyset); + + // // add module + // add_node_to_roots_buffer(closure, &buf, &len, jl_main_module); + + // // buildin values + // add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any); + // add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order); + // for (size_t i = 0; i < jl_current_modules.size; i += 2) { + // if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { + // add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]); + // } + // } + // add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type); + // for (size_t i = 0; i < N_CALL_CACHE; i++) { + // jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + // add_node_to_roots_buffer(closure, &buf, &len, v); + // } + // add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation); + + // // constants + // add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type); + // add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names); + // add_node_to_roots_buffer(closure, &buf, &len, precompile_field_replace); // jl_global_roots_table must be transitively pinned + // FIXME: We need to remove transitive pinning of global roots. Otherwise they may pin most of the objects in the heap. RootsWorkBuffer tpinned_buf = (closure->report_tpinned_nodes_func)((void**)0, 0, 0, closure->data, true); size_t tpinned_len = 0; add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_list); @@ -786,6 +1054,8 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu { jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here + // Note: we are using MMTk's weak ref processing. If we switch to Julia's weak ref processing, + // we need to make sure the value and the weak ref won't be moved (e.g. pin them) mmtk_add_weak_candidate(wr); return wr; } @@ -842,18 +1112,28 @@ STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* curs } } +STATIC_INLINE void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { + intptr_t addr = (intptr_t) obj; + uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } +} + STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); } -inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) { - mmtk_post_alloc(mutator, obj, size, 0); -} - STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit + if (MMTK_NEEDS_VO_BIT) { + mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); + } } STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { @@ -862,9 +1142,9 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t } STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: Similarly, for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit - // and log (old gen) bit + if (MMTK_NEEDS_VO_BIT) { + mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); + } } JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) @@ -963,6 +1243,15 @@ inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) return v; } +inline jl_value_t *jl_gc_alloc_nonmoving_(jl_ptls_t ptls, size_t sz, void *ty) +{ + // TODO: Currently we just alloc and pin the object. We may use a + // different non moving allocator instead. + jl_value_t *v = jl_gc_alloc_(ptls, sz, ty); + OBJ_PIN(v); + return v; +} + // allocation wrappers that track allocation and let collection run JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { @@ -1044,6 +1333,16 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAFEPOINT return jl_valueof(o); } +jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT +{ + jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(sz, 0, sizeof(void*), 0); + jl_value_t *sym = jl_valueof(tag); + jl_ptls_t ptls = jl_current_task->ptls; + jl_set_typetagof(sym, jl_symbol_tag, 0); // We need to set symbol tag. The GC tag doesnt matter. + mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, sym, sz); + return sym; +} + JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) { jl_ptls_t ptls = jl_current_task->ptls; @@ -1081,6 +1380,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) mmtk_set_vm_space((void*)img_data, len); } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + mmtk_immortal_region_post_alloc((void*)img_data, len); +} + // ========================================================================= // // Code specific to stock that is not supported by MMTk // ========================================================================= // @@ -1210,6 +1514,53 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } +#define jl_p_gcpreserve_stack (jl_current_task->gcpreserve_stack) + +// This macro currently uses malloc instead of alloca because this function will exit +// after pushing the roots into the gc_preserve_stack, which means that the preserve_begin function's +// stack frame will be destroyed (together with its alloca variables). When we support lowering this code +// inside the same function that is doing the preserve_begin/preserve_end calls we should be able to simple use allocas. +// Note also that we use a separate stack for gc preserve roots to avoid the possibility of calling free +// on a stack that has been allocated with alloca instead of malloc, which could happen depending on the order in which +// JL_GC_POP() and jl_gc_preserve_end_hook() occurs. + +#define JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(rts_var,n) \ + rts_var = ((jl_value_t**)malloc(((n)+2)*sizeof(jl_value_t*)))+2; \ + ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n); \ + ((void**)rts_var)[-1] = jl_p_gcpreserve_stack; \ + memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ + jl_p_gcpreserve_stack = (jl_gcframe_t*)&(((void**)rts_var)[-2]); \ + +#define JL_GC_POP_PRESERVE_ROOT_OBJS() \ + jl_gcframe_t *curr = jl_p_gcpreserve_stack; \ + if(curr) { \ + (jl_p_gcpreserve_stack = jl_p_gcpreserve_stack->prev); \ + free(curr); \ + } + +// Add each argument as a tpin root object. +// However, we cannot use JL_GC_PUSH and JL_GC_POP since the slots should live +// beyond this function. Instead, we maintain a tpin stack by mallocing/freeing +// the frames for each of the preserve regions we encounter +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_value_t** frame; + JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(frame, n); + if (n == 0) return; + + va_list args; + va_start(args, n); + for (int i = 0; i < n; i++) { + frame[i] = va_arg(args, jl_value_t *); + } + va_end(args); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + JL_GC_POP_PRESERVE_ROOT_OBJS(); +} + #ifdef __cplusplus } #endif diff --git a/src/gc-stock.c b/src/gc-stock.c index d0f0baf4f4715..9148a22fb09f5 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -806,6 +806,12 @@ inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) return v; } +inline jl_value_t *jl_gc_alloc_nonmoving_(jl_ptls_t ptls, size_t sz, void *ty) +{ + // Just use the normal allocation, as the GC won't move objects anyway. + return jl_gc_alloc_(ptls, sz, ty); +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -3461,6 +3467,11 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) gc_cblist_pre_gc, (collection)); if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { + // This thread will yield. + // jl_gc_notify_thread_yield does nothing for the stock GC at the point, but it may be non empty in the future, + // and this is a place where we should call jl_gc_notify_thread_yield. + // TODO: This call can be removed if requested. + jl_gc_notify_thread_yield(ptls, NULL); JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ if (_jl_gc_collect(ptls, collection)) { @@ -3945,6 +3956,15 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAFEPOINT return v; } +jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT +{ + jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(sz, 0, sizeof(void*), 0); + jl_value_t *sym = jl_valueof(tag); + // set to old marked so that we won't look at it in the GC or write barrier. + jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); + return sym; +} + JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4076,15 +4096,44 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } +// added for MMTk integration + void jl_gc_notify_image_load(const char* img_data, size_t len) { // Do nothing } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + // Do nothing +} + JL_DLLEXPORT const char* jl_gc_active_impl(void) { return "Built with stock GC"; } +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return 0; +} + +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr) { + return 0; +} + +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx) { + // Do nothing before a thread yields +} + #ifdef __cplusplus } #endif diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 5b69aef5d55fb..3a7f88980589d 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -14,6 +14,7 @@ extern "C" { typedef struct { MMTkMutatorContext mmtk_mutator; _Atomic(size_t) malloc_sz_since_last_poll; + ucontext_t ctx_at_the_time_gc_started; } jl_gc_tls_states_t; #ifdef __cplusplus diff --git a/src/genericmemory.c b/src/genericmemory.c index e435ec3b63c9f..1a576f9f44763 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -41,6 +41,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype); if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; + // Data is inlined and ptr is an internal pointer. We pin the object so the ptr will not be invalid. + OBJ_PIN(m); } else { int isaligned = 1; // jl_gc_managed_malloc is always aligned @@ -111,6 +113,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str) m->length = jl_string_len(str); m->ptr = jl_string_data(str); jl_genericmemory_data_owner_field(m) = str; + OBJ_PIN(str); return m; } @@ -166,6 +169,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; if (own_buffer) { + OBJ_PIN(m); int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); size_t allocated_bytes = memory_block_usable_size(data, isaligned); diff --git a/src/gf.c b/src/gf.c index 82e1e43333eb4..e3b1ff37983e5 100644 --- a/src/gf.c +++ b/src/gf.c @@ -620,7 +620,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( assert(min_world <= max_world && "attempting to set invalid world constraints"); //assert((!jl_is_method(mi->def.value) || max_world != ~(size_t)0 || min_world <= 1 || edges == NULL || jl_svec_len(edges) != 0) && "missing edges"); jl_task_t *ct = jl_current_task; - jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t), + jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); codeinst->def = (jl_value_t*)mi; codeinst->owner = owner; diff --git a/src/interpreter.c b/src/interpreter.c index 513fe58f7b5cc..755cbfd592ed3 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -52,7 +52,14 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT; #else +#ifdef MMTK_GC +#define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<3)|2) +// For roots that are not transitively pinned +#define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) ((((size_t)(n))<<3)|6) +#else #define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<2)|2) +#define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) JL_GC_ENCODE_PUSHFRAME(n) +#endif #define JL_GC_PUSHFRAME(frame,locals,n) \ JL_CPPALLOCA(frame, sizeof(*frame)+(((n)+3)*sizeof(jl_value_t*))); \ diff --git a/src/ircode.c b/src/ircode.c index 99c5833ac3be7..05fc3eef6fe7b 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -1661,12 +1661,15 @@ void jl_init_serializer(void) assert(LAST_TAG+1+i < 256); for (i = 2; i < 256; i++) { - if (deser_tag[i]) + if (deser_tag[i]) { + OBJHASH_PIN(deser_tag[i]) ptrhash_put(&ser_tag, deser_tag[i], (void*)i); + } } i = 2; while (common_symbols[i-2] != NULL) { + OBJHASH_PIN(common_symbols[i-2]) ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; i += 1; diff --git a/src/jitlayers.h b/src/jitlayers.h index 139137d0ca477..ff63b0f344153 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -238,6 +238,7 @@ struct jl_codegen_params_t { // outputs jl_workqueue_t workqueue; SmallVector cfuncs; + // This map may hold Julia obj ref in the native heap. We need to pin the void*. std::map global_targets; jl_array_t *temporary_roots = nullptr; std::map, GlobalVariable*> external_fns; @@ -335,6 +336,7 @@ Constant *literal_pointer_val_slot(jl_codegen_params_t ¶ms, Module *M, jl_va static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT { + PTR_PIN((void*)p); // This may point to non-mmtk heap memory. // this function will emit a static pointer into the generated code // the generated code will only be valid during the current session, // and thus, this should typically be avoided in new API's diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 664e1270c7381..431ad4ef6c886 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -181,6 +181,8 @@ XX(jl_gc_set_max_memory) \ XX(jl_gc_sync_total_bytes) \ XX(jl_gc_total_hrtime) \ + XX(jl_gc_preserve_begin_hook) \ + XX(jl_gc_preserve_end_hook) \ XX(jl_gdblookup) \ XX(jl_generating_output) \ XX(jl_declare_const_gf) \ diff --git a/src/jl_uv.c b/src/jl_uv.c index 3498952622dce..3ab1961457918 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -469,6 +469,7 @@ JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle) JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle, jl_value_t *data) { + OBJ_PIN(data); handle->data = data; } @@ -479,6 +480,7 @@ JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle, */ JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle) { + // TODO: unpin here -- we need to implement pin count before we can unpin objects. handle->data = NULL; } diff --git a/src/julia.h b/src/julia.h index dd3bc713a517f..52b055d9033a3 100644 --- a/src/julia.h +++ b/src/julia.h @@ -77,6 +77,13 @@ typedef struct _jl_tls_states_t *jl_ptls_t; // the common fields are hidden before the pointer, but the following macro is // used to indicate which types below are subtypes of jl_value_t #define JL_DATA_TYPE +// Objects of a type that is JL_NON_MOVING should be allocated with +// jl_gc_alloc_non_moving so they will never be moved by GC. +// Those types are usually frequently referenced by the runtime. +// It is basically a trade-off between allocating the objects as non-moving +// and pinning the objects after allocation. If objects of certain types are +// mostly likely to be pinned, it is a good idea to just allocate them as non moving. +#define JL_NON_MOVING typedef struct _jl_value_t jl_value_t; #include "julia_threads.h" @@ -84,6 +91,19 @@ typedef struct _jl_value_t jl_value_t; extern "C" { #endif +// object pinning ------------------------------------------------------------ + +// FIXME: Pinning objects that get hashed in the ptrhash table +// until we implement address space hashing. +#define OBJHASH_PIN(key) if (key) jl_gc_pin_object(key); +#define PTRHASH_PIN(key) if (key) jl_gc_pin_pointer(key); + +// Called when pinning objects that would cause an error if moved +// The difference: the argument for pin_object needs to pointer to an object (jl_value_t*), +// but the argument for pin_pointer can be an internal pointer. +#define OBJ_PIN(key) if (key) jl_gc_pin_object(key); +#define PTR_PIN(key) if (key) jl_gc_pin_pointer(key); + // core data types ------------------------------------------------------------ struct _jl_taggedvalue_bits { @@ -391,6 +411,7 @@ typedef struct _jl_method_t { // can can be used as a unique dictionary key representation of a call to a particular Method // with a particular set of argument types struct _jl_method_instance_t { + JL_NON_MOVING // Non moving, as it is referenced in a map in JITDebugInfoRegistry JL_DATA_TYPE union { jl_value_t *value; // generic accessor @@ -423,6 +444,7 @@ typedef struct _jl_opaque_closure_t { // This type represents an executable operation typedef struct _jl_code_instance_t { + JL_NON_MOVING // Pin codeinst, as they are referenced by vectors and maps in _jl_codegen_params_t JL_DATA_TYPE jl_value_t *def; // MethodInstance or ABIOverride jl_value_t *owner; // Compiler token this belongs to, `jl_nothing` is reserved for native @@ -502,6 +524,7 @@ typedef struct { // of a type and storing all data common to different instantiations of the type, // including a cache for hash-consed allocation of DataType objects. typedef struct { + JL_NON_MOVING // Typenames should be pinned since they are used as metadata, and are read during scan_object JL_DATA_TYPE jl_sym_t *name; struct _jl_module_t *module; @@ -582,6 +605,7 @@ typedef struct { } jl_datatype_layout_t; typedef struct _jl_datatype_t { + JL_NON_MOVING // Types should not be moved. It is also referenced from the native heap in jl_raw_alloc_t. JL_DATA_TYPE jl_typename_t *name; struct _jl_datatype_t *super; @@ -753,6 +777,7 @@ typedef struct { } jl_uuid_t; typedef struct _jl_module_t { + JL_NON_MOVING // modules are referenced in jl_current_modules (htable). They cannot move. JL_DATA_TYPE jl_sym_t *name; struct _jl_module_t *parent; @@ -1093,8 +1118,45 @@ struct _jl_gcframe_t { #define jl_pgcstack (jl_current_task->gcstack) +#ifndef MMTK_GC #define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<2) #define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<2)|1) +#define JL_GC_DECODE_NROOTS(n) (n >> 2) + +#define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) JL_GC_ENCODE_PUSHARGS(n) +#define JL_GC_ENCODE_PUSH_NO_TPIN(n) JL_GC_ENCODE_PUSH(n) + +#else + +// VO bit is required to support conservative stack scanning and moving. +#define MMTK_NEEDS_VO_BIT (1) + +// We use an extra bit (100) in the nroots value from the frame to indicate that the roots +// in the frame are/are not transitively pinning. +// There are currently 3 macros that encode passing nroots to the gcframe +// and they use the two lowest bits to encode information about what is in the frame (as below). +// To support the distinction between transtively pinning roots and non transitively pinning roots +// on the stack, we take another bit from nroots to encode information about whether or not to +// transitively pin the roots in the frame. +// +// So the ones that transitively pin look like: +// #define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<3) +// #define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<3)|1) +// #define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<3)|2) +// and the ones that do not look like: +// #define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) (((size_t)(n))<<3|4) +// #define JL_GC_ENCODE_PUSH_NO_TPIN(n) ((((size_t)(n))<<3)|5) +// #define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) ((((size_t)(n))<<3)|6) + +// these are transitively pinning +#define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<3) +#define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<3)|1) +#define JL_GC_DECODE_NROOTS(n) (n >> 3) + +// these only pin the root object itself +#define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) (((size_t)(n))<<3|4) +#define JL_GC_ENCODE_PUSH_NO_TPIN(n) ((((size_t)(n))<<3)|5) +#endif #ifdef __clang_gcanalyzer__ diff --git a/src/julia_internal.h b/src/julia_internal.h index 5fe6cad0d096c..a54ac4849fdc9 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -560,12 +560,17 @@ static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); * safepoints will be caught by the GC analyzer. */ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); +JL_DLLEXPORT jl_value_t *jl_gc_alloc_nonmoving(jl_ptls_t ptls, size_t sz, void *ty); // On GCC, only inline when sz is constant #ifdef __GNUC__ # define jl_gc_alloc(ptls, sz, ty) \ (__builtin_constant_p(sz) ? \ jl_gc_alloc_(ptls, sz, ty) : \ (jl_gc_alloc)(ptls, sz, ty)) +# define jl_gc_alloc_nonmoving(ptls, sz, ty) \ + (__builtin_constant_p(sz) ? \ + jl_gc_alloc_nonmoving_(ptls, sz, ty) : \ + (jl_gc_alloc_nonmoving)(ptls, sz, ty)) #else # define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty) #endif diff --git a/src/julia_threads.h b/src/julia_threads.h index 061eb9266e7a7..5c3edce2056f2 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -230,6 +230,7 @@ typedef struct _jl_excstack_t jl_excstack_t; typedef struct _jl_handler_t jl_handler_t; typedef struct _jl_task_t { + JL_NON_MOVING // jl_mutex_t (as globals) references tasks JL_DATA_TYPE jl_value_t *next; // invasive linked list for scheduler jl_value_t *queue; // invasive linked list for scheduler @@ -274,6 +275,9 @@ typedef struct _jl_task_t { // uint48_t padding2_64; // saved gc stack top for context switches jl_gcframe_t *gcstack; + // GC stack of objects from gc preserve regions + // These must always be transitively pinned. Only used by MMTK. + jl_gcframe_t *gcpreserve_stack; size_t world_age; // quick lookup for current ptls jl_ptls_t ptls; // == jl_all_tls_states[tid] diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 76dcd944890ab..8c3eda208aa6f 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -42,6 +42,8 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) IRBuilder<> builder(target); StoreInst *inst = builder.CreateAlignedStore( + // FIXME: We should use JL_GC_ENCODE_PUSHARGS_NO_TPIN here. + // We need to make sure things are properly pinned before turning this into a non TPIN push. ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)), builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),// GEP of 0 becomes a noop and eats the name Align(sizeof(void*))); diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 7b2a4bb033203..aae2e99c6a383 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -361,6 +361,7 @@ struct LateLowerGCFrame: private JuliaPassContext { void PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot, ArrayRef Colors, Value *GCFrame, Instruction *InsertBefore); void PlaceRootsAndUpdateCalls(ArrayRef Colors, int PreAssignedColors, State &S, std::map>); void CleanupWriteBarriers(Function &F, State *S, const SmallVector &WriteBarriers, bool *CFGModified); + void CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size); bool CleanupIR(Function &F, State *S, bool *CFGModified); void NoteUseChain(State &S, BBState &BBS, User *TheUser); SmallVector GetPHIRefinements(PHINode *phi, State &S); @@ -413,4 +414,12 @@ struct FinalLowerGC: private JuliaPassContext { void lowerSafepoint(CallInst *target, Function &F); }; +inline bool isSpecialPtr(Type *Ty) { + PointerType *PTy = dyn_cast(Ty); + if (!PTy) + return false; + unsigned AS = PTy->getAddressSpace(); + return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; +} + #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp index 5539c8dbcf153..e3f83be1f9381 100644 --- a/src/llvm-late-gc-lowering-mmtk.cpp +++ b/src/llvm-late-gc-lowering-mmtk.cpp @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "llvm-gc-interface-passes.h" +#include "mmtk.h" Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) { @@ -83,6 +84,31 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType()); + + // Post alloc + if (MMTK_NEEDS_VO_BIT) { + auto intptr_ty = Type::getInt64Ty(target->getContext()); + auto i8_ty = Type::getInt8Ty(F.getContext()); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_VO_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + // intptr_t addr = (intptr_t) v; + auto addr = v_raw; + // uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + auto shr = builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + // uint8_t byte_val = *vo_meta_addr; + auto byte_val = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + // uint8_t new_val = byte_val | (1 << shift); + auto shifted_val = builder.CreateShl(ConstantInt::get(intptr_ty, 1), shift); + auto shifted_val_i8 = builder.CreateTruncOrBitCast(shifted_val, i8_ty); + auto new_val = builder.CreateOr(byte_val, shifted_val_i8); + // (*vo_meta_addr) = new_val; + builder.CreateStore(new_val, metadata_ptr); + } + builder.CreateBr(next_instr->getParent()); phiNode->addIncoming(new_call, slowpath); @@ -94,3 +120,49 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) } return target; } + +void LateLowerGCFrame::CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size) { + if (callee == gc_preserve_begin_func) { + // Initialize an IR builder. + IRBuilder<> builder(CI); + + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + size_t nargs = 0; + State S2(F); + + std::vector args; + for (Use &U : CI->args()) { + Value *V = U; + if (isa(V)) + continue; + if (isa(V->getType())) { + if (isSpecialPtr(V->getType())) { + int Num = Number(S2, V); + if (Num >= 0) { + nargs++; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + } + } + } else { + auto Nums = NumberAll(S2, V); + for (int Num : Nums) { + if (Num < 0) + continue; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + nargs++; + } + } + } + args.insert(args.begin(), ConstantInt::get(T_size, nargs)); + + ArrayRef args_llvm = ArrayRef(args); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); + } else if (callee == gc_preserve_end_func) { + // Initialize an IR builder. + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); + } +} diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp index 2a11487773396..838300043768d 100644 --- a/src/llvm-late-gc-lowering-stock.cpp +++ b/src/llvm-late-gc-lowering-stock.cpp @@ -7,3 +7,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) // Do nothing for the stock GC return target; } + +void LateLowerGCFrame::CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size) { + // Do nothing for the stock GC +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7d6fba65a79e7..b517c948087f7 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -13,14 +13,6 @@ static bool isTrackedValue(Value *V) { return PT && PT->getAddressSpace() == AddressSpace::Tracked; } -static bool isSpecialPtr(Type *Ty) { - PointerType *PTy = dyn_cast(Ty); - if (!PTy) - return false; - unsigned AS = PTy->getAddressSpace(); - return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; -} - // return how many Special pointers are in T (count > 0), // and if there is anything else in T (all == false) CountTrackedPointers::CountTrackedPointers(Type *T, bool ignore_loaded) { @@ -2006,9 +1998,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { continue; } Value *callee = CI->getCalledOperand(); - if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func - || callee == gc_preserve_end_func)) { + if (callee && callee == gc_flush_func) { /* No replacement */ + } else if (callee && (callee == gc_preserve_begin_func + || callee == gc_preserve_end_func)) { + CleanupGCPreserve(F, CI, callee, T_size); } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ca25251040fb2..2d54995161908 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -252,6 +252,8 @@ namespace jl_well_known { static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed); + static const char *GC_PRESERVE_BEGIN_HOOK_NAME = XSTR(jl_gc_preserve_begin_hook); + static const char *GC_PRESERVE_END_HOOK_NAME = XSTR(jl_gc_preserve_end_hook); using jl_intrinsics::addGCAllocAttributes; @@ -320,4 +322,50 @@ namespace jl_well_known { allocTypedFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None)); return addGCAllocAttributes(allocTypedFunc); }); + + const WellKnownFunctionDescription GCPreserveBeginHook( + GC_PRESERVE_BEGIN_HOOK_NAME, + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(ctx), + { T_size }, + true), + Function::ExternalLinkage, + GC_PRESERVE_BEGIN_HOOK_NAME); + +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif + return func; + }); + + const WellKnownFunctionDescription GCPreserveEndHook( + GC_PRESERVE_END_HOOK_NAME, + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(ctx), + { }, + false), + Function::ExternalLinkage, + GC_PRESERVE_END_HOOK_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif + return func; + }); +} + +void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info) +{ + if (debug_info >= 2 && !llvm::isa(V)) { + V->setName(Name); + } } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index d46f1f46634e6..b02c53b9797fa 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -155,6 +155,12 @@ namespace jl_well_known { // `jl_gc_alloc_typed`: allocates bytes. extern const WellKnownFunctionDescription GCAllocTyped; + + // `jl_gc_preserve_begin_hook`: called at the beginning of gc preserve regions, if required + extern const WellKnownFunctionDescription GCPreserveBeginHook; + + // `jl_gc_preserve_end_hook`: called at the end of gc preserve regions, if required + extern const WellKnownFunctionDescription GCPreserveEndHook; } void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info); diff --git a/src/method.c b/src/method.c index 1b38a16649d8a..0b7923259dde2 100644 --- a/src/method.c +++ b/src/method.c @@ -555,7 +555,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) { jl_task_t *ct = jl_current_task; jl_method_instance_t *mi = - (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t), + (jl_method_instance_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_method_instance_t), jl_method_instance_type); mi->def.value = NULL; mi->specTypes = NULL; diff --git a/src/module.c b/src/module.c index 1b6b37e49949e..9aa5c214d1ece 100644 --- a/src/module.c +++ b/src/module.c @@ -209,7 +209,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui { jl_task_t *ct = jl_current_task; const jl_uuid_t uuid_zero = {0, 0}; - jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t), + jl_module_t *m = (jl_module_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_module_t), jl_module_type); jl_set_typetagof(m, jl_module_tag, 0); assert(jl_is_symbol(name)); diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 9b7374d95af50..97b14c06699a8 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -320,6 +320,8 @@ jl_value_t *jl_get_cfunction_trampoline( tramp = trampoline_alloc(); ((void**)result)[0] = tramp; init_trampoline(tramp, nval); + OBJHASH_PIN((void*)fobj) + OBJHASH_PIN(result) ptrhash_put(cache, (void*)fobj, result); uv_mutex_unlock(&trampoline_lock); return result; diff --git a/src/signals-unix.c b/src/signals-unix.c index 1f4ad647a87af..fa3ad6a09b9eb 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -410,6 +410,8 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context) return; } if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && jl_addr_is_safepoint((uintptr_t)info->si_addr) && !is_write_fault(context)) { + // TODO: We should do the same for other platforms + jl_gc_notify_thread_yield(ct->ptls, context); jl_set_gc_and_wait(ct); // Do not raise sigint on worker thread if (jl_atomic_load_relaxed(&ct->tid) != 0) diff --git a/src/staticdata.c b/src/staticdata.c index 4ea93fc58a9e0..b24d7f88f5da8 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -741,8 +741,11 @@ static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT { - if (*addr != newval) + if (*addr != newval) { + OBJHASH_PIN((void*)addr) + OBJHASH_PIN((void*)newval) ptrhash_put(&field_replace, (void*)addr, newval); + } } static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED @@ -2538,6 +2541,7 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED assert(serialization_queue.items[from_seroder_entry(idx)] == cache); cache = cache_rehash_set(cache, sz); // redirect all references to the old cache to relocate to the new cache object + OBJHASH_PIN((void*)cache) ptrhash_put(&serialization_order, cache, idx); serialization_queue.items[from_seroder_entry(idx)] = cache; return cache; @@ -3803,6 +3807,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl assert(tag == 0); arraylist_push(&delay_list, obj); arraylist_push(&delay_list, pfld); + OBJHASH_PIN(obj) ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid *pfld = (uintptr_t)NULL; continue; @@ -3837,6 +3842,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl } static_assert(offsetof(jl_datatype_t, name) == 0, ""); newdt->name = dt->name; + OBJHASH_PIN(newdt) + OBJHASH_PIN(dt) ptrhash_put(&new_dt_objs, (void*)newdt, dt); } else { @@ -4198,8 +4205,10 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i char *sysimg; int success = !needs_permalloc; ios_seek(f, datastartpos); - if (needs_permalloc) + if (needs_permalloc) { sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); + } else sysimg = &f->buf[f->bpos]; if (needs_permalloc) @@ -4323,6 +4332,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file."); diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 1985357321a3a..dbd1f0f125dc3 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -82,7 +82,7 @@ static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT return 0; } -static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; +jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; // Mutex for newly_inferred jl_mutex_t newly_inferred_mutex; extern jl_mutex_t world_counter_lock; @@ -272,6 +272,7 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_ext_cis, uin assert(jl_is_code_instance(ci)); jl_method_t *m = jl_get_ci_mi(ci)->def.method; assert(jl_is_method(m)); + OBJHASH_PIN(m) ptrhash_put(&mset, (void*)m, (void*)m); } int nwithkey; diff --git a/src/subtype.c b/src/subtype.c index a0b7bff4006ce..5cb779fc0be8a 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -274,7 +274,7 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root) } else { roots = se->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } } jl_varbinding_t *v = e->vars; @@ -367,7 +367,7 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO } else { roots = se->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } } jl_varbinding_t *v = e->vars; @@ -4193,7 +4193,7 @@ static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int co else { saved = se->roots; merged = me->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } assert(nroots == current_env_length(e) * 3); assert(nroots % 3 == 0); diff --git a/src/symbol.c b/src/symbol.c index 34b9073240cf3..1356494667a6c 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -10,6 +10,7 @@ #include "julia.h" #include "julia_internal.h" #include "julia_assert.h" +#include "gc-interface.h" #ifdef __cplusplus extern "C" { diff --git a/src/task.c b/src/task.c index 068689d534a03..9f85ad09d5641 100644 --- a/src/task.c +++ b/src/task.c @@ -307,7 +307,7 @@ CFI_NORETURN #endif /* Rooted by the base module */ -static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL; +_Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL; void JL_NORETURN jl_finish_task(jl_task_t *ct) { @@ -1072,7 +1072,7 @@ void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSA JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize) { jl_task_t *ct = jl_current_task; - jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *t = (jl_task_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(t, jl_task_tag, 0); JL_PROBE_RT_NEW_TASK(ct, t); t->ctx.copy_stack = 0; @@ -1105,6 +1105,12 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->start = start; t->result = jl_nothing; t->donenotify = completion_future; + // completion_future is a GenericCondition with SpinLock. + // I am not sure why we have to pin this. But, if we don't pin it, + // it may get moved, and we still use the invalid old reference somehow. + // See https://github.com/mmtk/mmtk-julia/issues/179. + // TODO: We should understand where we get the invalid reference from. + OBJ_PIN(completion_future); jl_atomic_store_relaxed(&t->_isexception, 0); // Inherit scope from parent task t->scope = ct->scope; @@ -1536,7 +1542,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) bootstrap_task.value.ptls = ptls; if (jl_nothing == NULL) // make a placeholder jl_nothing = jl_gc_permobj(0, jl_nothing_type, 0); - jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *ct = (jl_task_t*)jl_gc_alloc_nonmoving(ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(ct, jl_task_tag, 0); memset(ct, 0, sizeof(jl_task_t)); void *stack = stack_lo; diff --git a/src/toplevel.c b/src/toplevel.c index 321ef8c79dac0..26e09394af5e7 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -139,6 +139,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex jl_value_t *form = (jl_value_t*)newm; JL_GC_PUSH1(&form); JL_LOCK(&jl_modules_mutex); + OBJHASH_PIN(newm) ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1)); JL_UNLOCK(&jl_modules_mutex);