diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 7aa29786fa64b..c520904cd1f03 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -541,7 +541,9 @@ Function *IRLinker_copyFunctionProto(Module *DstM, Function *SF) { auto *F = Function::Create(SF->getFunctionType(), SF->getLinkage(), SF->getAddressSpace(), SF->getName(), DstM); F->copyAttributesFrom(SF); +#if JL_LLVM_VERSION < 210000 F->IsNewDbgInfoFormat = SF->IsNewDbgInfoFormat; +#endif // Remove these copied constants since they point to the source module. F->setPersonalityFn(nullptr); @@ -796,234 +798,235 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); egal_set method_roots; - jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second)); - if (!llvmmod) - params.getContext().setDiscardValueNames(true); - params.params = &target_cgparams; - assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled - params.external_linkage = external_linkage; - params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - bool safepoint_on_entry = params.safepoint_on_entry; - JL_GC_PUSH3(¶ms.temporary_roots, &method_roots.list, &method_roots.keyset); - jl_compiled_functions_t compiled_functions; - size_t i, l; - for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) { - // each item in this list is either a CodeInstance followed by a CodeInfo indicating something - // to compile, or a rettype followed by a sig describing a C-callable alias to create. - jl_value_t *item = jl_array_ptr_ref(codeinfos, i); - if (jl_is_code_instance(item)) { - // now add it to our compilation results - jl_code_instance_t *codeinst = (jl_code_instance_t*)item; - jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i); - assert(jl_is_code_info(src)); - if (compiled_functions.count(codeinst)) - continue; // skip any duplicates that accidentally made there way in here (or make this an error?) - if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) - params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable - orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), - params.tsctx, clone.getModuleUnlocked()->getDataLayout(), - Triple(clone.getModuleUnlocked()->getTargetTriple())); - jl_llvm_functions_t decls; - if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) - decls.functionObject = "jl_fptr_const_return"; - else - decls = jl_emit_codeinst(result_m, codeinst, src, params); - params.safepoint_on_entry = safepoint_on_entry; - record_method_roots(method_roots, jl_get_ci_mi(codeinst)); - if (result_m) - compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + return withCodegenParamsDo(ctxt, std::move(target_info.first), std::move(target_info.second), [&] (jl_codegen_params_t ¶ms) { + if (!llvmmod) + params.getContext().setDiscardValueNames(true); + params.params = &target_cgparams; + assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled + params.external_linkage = external_linkage; + params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); + bool safepoint_on_entry = params.safepoint_on_entry; + JL_GC_PUSH3(¶ms.temporary_roots, &method_roots.list, &method_roots.keyset); + jl_compiled_functions_t compiled_functions; + size_t i, l; + for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) { + // each item in this list is either a CodeInstance followed by a CodeInfo indicating something + // to compile, or a rettype followed by a sig describing a C-callable alias to create. + jl_value_t *item = jl_array_ptr_ref(codeinfos, i); + if (jl_is_code_instance(item)) { + // now add it to our compilation results + jl_code_instance_t *codeinst = (jl_code_instance_t*)item; + jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i); + assert(jl_is_code_info(src)); + if (compiled_functions.count(codeinst)) + continue; // skip any duplicates that accidentally made there way in here (or make this an error?) + if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) + params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable + orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), + params.tsctx, clone.getModuleUnlocked()->getDataLayout(), + Triple(clone.getModuleUnlocked()->getTargetTriple())); + jl_llvm_functions_t decls; + if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) + decls.functionObject = "jl_fptr_const_return"; + else + decls = jl_emit_codeinst(result_m, codeinst, src, params); + params.safepoint_on_entry = safepoint_on_entry; + record_method_roots(method_roots, jl_get_ci_mi(codeinst)); + if (result_m) + compiled_functions[codeinst] = {std::move(result_m), std::move(decls)}; + } + else { + assert(jl_is_simplevector(item)); + jl_value_t *rt = jl_svecref(item, 0); + jl_value_t *sig = jl_svecref(item, 1); + jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2); + assert(jl_is_type(rt) && jl_is_type(sig)); + jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params); + } } - else { - assert(jl_is_simplevector(item)); - jl_value_t *rt = jl_svecref(item, 0); - jl_value_t *sig = jl_svecref(item, 1); - jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2); - assert(jl_is_type(rt) && jl_is_type(sig)); - jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params); - } - } - // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them - resolve_workqueue(params, method_roots, compiled_functions); - // including generating cfunction thunks - generate_cfunc_thunks(params, compiled_functions); - aot_optimize_roots(params, method_roots, compiled_functions); - params.temporary_roots = nullptr; - params.temporary_roots_set.clear(); - JL_GC_POP(); - - // process the globals array, before jl_merge_module destroys them - SmallVector gvars(params.global_targets.size()); - data->jl_value_to_llvm.resize(params.global_targets.size()); - StringSet<> gvars_names; - DenseSet gvars_set; - - size_t idx = 0; - for (auto &global : params.global_targets) { - gvars[idx] = global.second->getName().str(); - assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!"); - assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!"); - data->jl_value_to_llvm[idx] = global.first; - idx++; - } - CreateNativeMethods += compiled_functions.size(); - - size_t offset = gvars.size(); - data->jl_external_to_llvm.resize(params.external_fns.size()); - - for (auto &extern_fn : params.external_fns) { - jl_code_instance_t *this_code = std::get<0>(extern_fn.first); - bool specsig = std::get<1>(extern_fn.first); - assert(specsig && "Error external_fns doesn't handle non-specsig yet"); - (void) specsig; - GlobalVariable *F = extern_fn.second; - size_t idx = gvars.size() - offset; - assert(idx >= 0); - assert(idx < data->jl_external_to_llvm.size()); - data->jl_external_to_llvm[idx] = this_code; - assert(gvars_set.insert(F).second && "Duplicate gvar in params!"); - assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!"); - gvars.push_back(std::string(F->getName())); - } - - // clones the contents of the module `m` to the shadow_output collector - // while examining and recording what kind of function pointer we have - { - Linker L(*clone.getModuleUnlocked()); - for (auto &def : compiled_functions) { - jl_code_instance_t *this_code = def.first; - JL_GC_PROMISE_ROOTED(this_code); - jl_llvm_functions_t &decls = def.second.decls; - StringRef func = decls.functionObject; - StringRef cfunc = decls.specFunctionObject; - orc::ThreadSafeModule &M = def.second.TSM; - if (external_linkage) { - uint8_t specsigflags; - jl_callptr_t invoke; - void *fptr; - jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0); - if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) { - // this codeinst is already available externally: keep it only if canPartition demands it for local use - // TODO: for performance, avoid generating the src code when we know it would reach here anyways? - if (M.withModuleDo([&](Module &M) { return !canPartition(*cast(M.getNamedValue(cfunc))); })) { - jl_merge_module(L, std::move(M)); + // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them + resolve_workqueue(params, method_roots, compiled_functions); + // including generating cfunction thunks + generate_cfunc_thunks(params, compiled_functions); + aot_optimize_roots(params, method_roots, compiled_functions); + params.temporary_roots = nullptr; + params.temporary_roots_set.clear(); + JL_GC_POP(); + + // process the globals array, before jl_merge_module destroys them + SmallVector gvars(params.global_targets.size()); + data->jl_value_to_llvm.resize(params.global_targets.size()); + StringSet<> gvars_names; + DenseSet gvars_set; + + size_t idx = 0; + for (auto &global : params.global_targets) { + gvars[idx] = global.second->getName().str(); + assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!"); + assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!"); + data->jl_value_to_llvm[idx] = global.first; + idx++; + } + CreateNativeMethods += compiled_functions.size(); + + size_t offset = gvars.size(); + data->jl_external_to_llvm.resize(params.external_fns.size()); + + for (auto &extern_fn : params.external_fns) { + jl_code_instance_t *this_code = std::get<0>(extern_fn.first); + bool specsig = std::get<1>(extern_fn.first); + assert(specsig && "Error external_fns doesn't handle non-specsig yet"); + (void) specsig; + GlobalVariable *F = extern_fn.second; + size_t idx = gvars.size() - offset; + assert(idx >= 0); + assert(idx < data->jl_external_to_llvm.size()); + data->jl_external_to_llvm[idx] = this_code; + assert(gvars_set.insert(F).second && "Duplicate gvar in params!"); + assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!"); + gvars.push_back(std::string(F->getName())); + } + + // clones the contents of the module `m` to the shadow_output collector + // while examining and recording what kind of function pointer we have + { + Linker L(*clone.getModuleUnlocked()); + for (auto &def : compiled_functions) { + jl_code_instance_t *this_code = def.first; + JL_GC_PROMISE_ROOTED(this_code); + jl_llvm_functions_t &decls = def.second.decls; + StringRef func = decls.functionObject; + StringRef cfunc = decls.specFunctionObject; + orc::ThreadSafeModule &M = def.second.TSM; + if (external_linkage) { + uint8_t specsigflags; + jl_callptr_t invoke; + void *fptr; + jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0); + if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) { + // this codeinst is already available externally: keep it only if canPartition demands it for local use + // TODO: for performance, avoid generating the src code when we know it would reach here anyways? + if (M.withModuleDo([&](Module &M) { return !canPartition(*cast(M.getNamedValue(cfunc))); })) { + jl_merge_module(L, std::move(M)); + } + continue; } - continue; } + jl_merge_module(L, std::move(M)); + uint32_t func_id = 0; + uint32_t cfunc_id = 0; + if (func == "jl_fptr_args") { + func_id = -1; + } + else if (func == "jl_fptr_sparam") { + func_id = -2; + } + else if (func == "jl_f_opaque_closure_call") { + func_id = -4; + } + else if (func == "jl_fptr_const_return") { + func_id = -5; + } + else { + //Safe b/c context is locked by params + data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(func))); + func_id = data->jl_sysimg_fvars.size(); + } + if (!cfunc.empty()) { + //Safe b/c context is locked by params + data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(cfunc))); + cfunc_id = data->jl_sysimg_fvars.size(); + } + data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id); } - jl_merge_module(L, std::move(M)); - uint32_t func_id = 0; - uint32_t cfunc_id = 0; - if (func == "jl_fptr_args") { - func_id = -1; - } - else if (func == "jl_fptr_sparam") { - func_id = -2; - } - else if (func == "jl_f_opaque_closure_call") { - func_id = -4; - } - else if (func == "jl_fptr_const_return") { - func_id = -5; - } - else { - //Safe b/c context is locked by params - data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(func))); - func_id = data->jl_sysimg_fvars.size(); - } - if (!cfunc.empty()) { - //Safe b/c context is locked by params - data->jl_sysimg_fvars.push_back(cast(clone.getModuleUnlocked()->getNamedValue(cfunc))); - cfunc_id = data->jl_sysimg_fvars.size(); + bool Changed = true; + while (Changed) { + Changed = false; + // make sure everything referenced got included though, since some functions aren't + // correctly implemented by staticdata for external use, and so codegen won't emit + // an external reference but expects a private copy here instead + for (auto &def : compiled_functions) { + orc::ThreadSafeModule &M = def.second.TSM; + if (!M) + continue; + jl_llvm_functions_t &decls = def.second.decls; + StringRef func = decls.functionObject; + StringRef cfunc = decls.specFunctionObject; + if (func != "jl_fptr_args" && + func != "jl_fptr_sparam" && + func != "jl_f_opaque_closure_call" && + clone.getModuleUnlocked()->getNamedValue(func)) { + jl_merge_module(L, std::move(M)); + Changed = true; + continue; + } + if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) { + Changed = true; + jl_merge_module(L, std::move(M)); + } + } } - data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id); - } - bool Changed = true; - while (Changed) { - Changed = false; - // make sure everything referenced got included though, since some functions aren't - // correctly implemented by staticdata for external use, and so codegen won't emit - // an external reference but expects a private copy here instead +#ifndef NDEBUG + // make sure we didn't forget anything that we promised to include in here for (auto &def : compiled_functions) { - orc::ThreadSafeModule &M = def.second.TSM; - if (!M) - continue; jl_llvm_functions_t &decls = def.second.decls; StringRef func = decls.functionObject; StringRef cfunc = decls.specFunctionObject; if (func != "jl_fptr_args" && func != "jl_fptr_sparam" && - func != "jl_f_opaque_closure_call" && - clone.getModuleUnlocked()->getNamedValue(func)) { - jl_merge_module(L, std::move(M)); - Changed = true; - continue; + func != "jl_f_opaque_closure_call") { + GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func); + assert(!F || !F->isDeclaration()); } - if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) { - Changed = true; - jl_merge_module(L, std::move(M)); + if (!cfunc.empty()) { + GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc); + assert(!F || !F->isDeclaration()); } } - } -#ifndef NDEBUG - // make sure we didn't forget anything that we promised to include in here - for (auto &def : compiled_functions) { - jl_llvm_functions_t &decls = def.second.decls; - StringRef func = decls.functionObject; - StringRef cfunc = decls.specFunctionObject; - if (func != "jl_fptr_args" && - func != "jl_fptr_sparam" && - func != "jl_f_opaque_closure_call") { - GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func); - assert(!F || !F->isDeclaration()); - } - if (!cfunc.empty()) { - GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc); - assert(!F || !F->isDeclaration()); - } - } #endif - compiled_functions.clear(); - if (params._shared_module) { - bool error = L.linkInModule(std::move(params._shared_module)); - assert(!error && "Error linking in shared module"); - (void)error; - } - } - - // now get references to the globals in the merged module - // and set them to be internalized and initialized at startup - // filter out any gvars that got optimized away - idx = 0; - size_t newoffset = 0; - size_t newidx = 0; - for (auto &global : gvars) { - //Safe b/c context is locked by params - GlobalVariable *G = cast_or_null(clone.getModuleUnlocked()->getNamedValue(global)); - if (G != nullptr) { - assert(!G->hasInitializer()); - G->setInitializer(Constant::getNullValue(G->getValueType())); - G->setLinkage(GlobalValue::InternalLinkage); - G->setDSOLocal(true); - assert(newidx == data->jl_sysimg_gvars.size()); - if (idx < offset) { - data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx]; - newoffset = newidx + 1; + compiled_functions.clear(); + if (params._shared_module) { + bool error = L.linkInModule(std::move(params._shared_module)); + assert(!error && "Error linking in shared module"); + (void)error; } - else { - data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset]; + } + + // now get references to the globals in the merged module + // and set them to be internalized and initialized at startup + // filter out any gvars that got optimized away + idx = 0; + size_t newoffset = 0; + size_t newidx = 0; + for (auto &global : gvars) { + //Safe b/c context is locked by params + GlobalVariable *G = cast_or_null(clone.getModuleUnlocked()->getNamedValue(global)); + if (G != nullptr) { + assert(!G->hasInitializer()); + G->setInitializer(Constant::getNullValue(G->getValueType())); + G->setLinkage(GlobalValue::InternalLinkage); + G->setDSOLocal(true); + assert(newidx == data->jl_sysimg_gvars.size()); + if (idx < offset) { + data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx]; + newoffset = newidx + 1; + } + else { + data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset]; + } + data->jl_sysimg_gvars.push_back(G); + newidx++; } - data->jl_sysimg_gvars.push_back(G); - newidx++; + idx++; } - idx++; - } - data->jl_value_to_llvm.resize(newoffset); - data->jl_external_to_llvm.resize(newidx - newoffset); - gvars.clear(); - CreateNativeGlobals += idx; + data->jl_value_to_llvm.resize(newoffset); + data->jl_external_to_llvm.resize(newidx - newoffset); + gvars.clear(); + CreateNativeGlobals += idx; - data->M = std::move(clone); - return (void*)data; + data->M = std::move(clone); + return (void*)data; + }); } static object::Archive::Kind getDefaultForHost(Triple &triple) @@ -1533,7 +1536,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer AOTOutputs out; auto TM = std::unique_ptr( SourceTM.getTarget().createTargetMachine( +#if JL_LLVM_VERSION < 210000 SourceTM.getTargetTriple().str(), +#else + SourceTM.getTargetTriple(), +#endif SourceTM.getTargetCPU(), SourceTM.getTargetFeatureString(), SourceTM.Options, @@ -1561,7 +1568,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer auto PMTM = std::unique_ptr( SourceTM.getTarget().createTargetMachine( +#if JL_LLVM_VERSION < 210000 SourceTM.getTargetTriple().str(), +#else + SourceTM.getTargetTriple(), +#endif SourceTM.getTargetCPU(), SourceTM.getTargetFeatureString(), SourceTM.Options, @@ -1835,71 +1846,156 @@ static void construct_vars(Module &M, Partition &partition, StringRef suffix) { gidxs_var->setDSOLocal(true); } -extern "C" void lambda_trampoline(void* arg) { - std::function* func = static_cast*>(arg); - (*func)(); - delete func; +template +static inline void schedule_uv_thread(uv_thread_t *worker, CB &&cb) +{ + auto func = new CB(std::move(cb)); + // Use libuv thread to avoid issues with stack sizes + uv_thread_create(worker, [] (void *arg) { + auto func = static_cast(arg); + (*func)(); + delete func; + }, func); } // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading, // as well as partitioning, serialization, and deserialization. -template -static SmallVector add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads, - bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) { - SmallVector outputs(threads); - assert(threads); - assert(unopt_out || opt_out || obj_out || asm_out); - // Timers for timing purposes - TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str()); - SmallVector timers(threads); - for (unsigned i = 0; i < threads; ++i) { - auto idx = std::to_string(i); - timers[i].name = "shard_" + idx; - timers[i].desc = ("Timings for " + name + " module shard " + idx).str(); - timers[i].deserialize.init("deserialize_" + idx, "Deserialize module"); - timers[i].materialize.init("materialize_" + idx, "Materialize declarations"); - timers[i].construct.init("construct_" + idx, "Construct partitioned definitions"); - timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode"); - timers[i].optimize.init("optimize_" + idx, "Optimize shard"); - timers[i].opt.init("opt_" + idx, "Emit optimized bitcode"); - timers[i].obj.init("obj_" + idx, "Emit object file"); - timers[i].asm_.init("asm_" + idx, "Emit assembly file"); - } - Timer partition_timer("partition", "Partition module", timer_group); - Timer serialize_timer("serialize", "Serialize module", timer_group); - Timer output_timer("output", "Add outputs", timer_group); - bool report_timings = false; - if (auto env = getenv("JULIA_IMAGE_TIMINGS")) { - char *endptr; - unsigned long val = strtoul(env, &endptr, 10); - if (endptr != env && !*endptr && val <= 1) { - report_timings = val; - } else { - if (StringRef("true").compare_insensitive(env) == 0) - report_timings = true; - else if (StringRef("false").compare_insensitive(env) == 0) - report_timings = false; - else - errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n"; + +// This is more or less a manual co-routine version of add_output +// which allows exiting calling scope when the module is released. +struct OutputAdder { + OutputAdder(Module &M, TargetMachine &TM, StringRef name, unsigned threads, + bool unopt_out, bool opt_out, bool obj_out, bool asm_out) + : TM(TM), threads(threads), unopt_out(unopt_out), + opt_out(opt_out), obj_out(obj_out), asm_out(asm_out), + timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str()) + { + assert(threads); + assert(unopt_out || opt_out || obj_out || asm_out); + for (unsigned i = 0; i < threads; ++i) { + auto idx = std::to_string(i); + timers[i].name = "shard_" + idx; + timers[i].desc = ("Timings for " + name + " module shard " + idx).str(); + timers[i].deserialize.init("deserialize_" + idx, "Deserialize module"); + timers[i].materialize.init("materialize_" + idx, "Materialize declarations"); + timers[i].construct.init("construct_" + idx, "Construct partitioned definitions"); + timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode"); + timers[i].optimize.init("optimize_" + idx, "Optimize shard"); + timers[i].opt.init("opt_" + idx, "Emit optimized bitcode"); + timers[i].obj.init("obj_" + idx, "Emit object file"); + timers[i].asm_.init("asm_" + idx, "Emit assembly file"); + } + if (auto env = getenv("JULIA_IMAGE_TIMINGS")) { + char *endptr; + unsigned long val = strtoul(env, &endptr, 10); + if (endptr != env && !*endptr && val <= 1) { + report_timings = val; + } else { + if (StringRef("true").compare_insensitive(env) == 0) + report_timings = true; + else if (StringRef("false").compare_insensitive(env) == 0) + report_timings = false; + else + errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n"; + } + } + // Single-threaded case + if (threads == 1) { + output_timer.startTimer(); + { + JL_TIMING(NATIVE_AOT, NATIVE_Opt); + // convert gvars to the expected offset table format for shard 0 + if (M.getGlobalVariable("jl_gvars")) { + auto gvars = consume_gv(M, "jl_gvars", false); + Type *T_size = M.getDataLayout().getIntPtrType(M.getContext()); + emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix" + M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0"); + } + output0 = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out); + } + output_timer.stopTimer(); + return; + } + + partition_timer.startTimer(); + uint64_t counter = 0; + // Partitioning requires all globals to have names. + // We use a prefix to avoid name conflicts with user code. + for (auto &G : M.global_values()) { + if (!G.isDeclaration() && !G.hasName()) { + G.setName("jl_ext_" + Twine(counter++)); + } } + partitions = partitionModule(M, threads); + partition_timer.stopTimer(); + + serialize_timer.startTimer(); + serialized = serializeModule(M); + serialize_timer.stopTimer(); } - // Single-threaded case - if (threads == 1) { + + auto finish() + { + SmallVector outputs(threads); + if (threads == 1) { + outputs[0] = std::move(output0); + if (!report_timings) { + timer_group.clear(); + } else { + timer_group.print(dbgs(), true); + for (auto &t : timers) { + t.print(dbgs(), true); + } + } + return outputs; + } output_timer.startTimer(); + + // Start all of the worker threads { JL_TIMING(NATIVE_AOT, NATIVE_Opt); - // convert gvars to the expected offset table format for shard 0 - if (M.getGlobalVariable("jl_gvars")) { - auto gvars = consume_gv(M, "jl_gvars", false); - Type *T_size = M.getDataLayout().getIntPtrType(M.getContext()); - emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix" - M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0"); + std::vector workers(threads); + for (unsigned i = 0; i < threads; i++) { + schedule_uv_thread(&workers[i], [&, i]() { + LLVMContext ctx; + ctx.setDiscardValueNames(true); + // Lazily deserialize the entire module + timers[i].deserialize.startTimer(); + auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx); + // Make sure this also fails with only julia, but not LLVM assertions enabled, + // otherwise, the first error we hit is the LLVM module verification failure, + // which will look very confusing, because the module was partially deserialized. + bool deser_succeeded = (bool)EM; + auto M = cantFail(std::move(EM), "Error loading module"); + assert(deser_succeeded); (void)deser_succeeded; + timers[i].deserialize.stopTimer(); + + timers[i].materialize.startTimer(); + materializePreserved(*M, partitions[i]); + timers[i].materialize.stopTimer(); + + timers[i].construct.startTimer(); + std::string suffix = "_" + std::to_string(i); + construct_vars(*M, partitions[i], suffix); + M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix)); + // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file + // or it may skip emitting debug info for that file. Here set it to ./julia#N + DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), "."); + if (M->getNamedMetadata("llvm.dbg.cu")) + for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands()) + CU->replaceOperandWith(0, topfile); + timers[i].construct.stopTimer(); + + outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out); + }); } - outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out); + + // Wait for all of the worker threads to finish + for (unsigned i = 0; i < threads; i++) + uv_thread_join(&workers[i]); } + output_timer.stopTimer(); - // Don't need M anymore - module_released(M); if (!report_timings) { timer_group.clear(); @@ -1908,99 +2004,37 @@ static SmallVector add_output(Module &M, TargetMachine &TM, Stri for (auto &t : timers) { t.print(dbgs(), true); } + dbgs() << "Partition weights: ["; + bool comma = false; + for (auto &p : partitions) { + if (comma) + dbgs() << ", "; + else + comma = true; + dbgs() << p.weight; + } + dbgs() << "]\n"; } return outputs; } - partition_timer.startTimer(); - uint64_t counter = 0; - // Partitioning requires all globals to have names. - // We use a prefix to avoid name conflicts with user code. - for (auto &G : M.global_values()) { - if (!G.isDeclaration() && !G.hasName()) { - G.setName("jl_ext_" + Twine(counter++)); - } - } - auto partitions = partitionModule(M, threads); - partition_timer.stopTimer(); - - serialize_timer.startTimer(); - auto serialized = serializeModule(M); - serialize_timer.stopTimer(); - - // Don't need M anymore, since we'll only read from serialized from now on - module_released(M); - - output_timer.startTimer(); - - // Start all of the worker threads - { - JL_TIMING(NATIVE_AOT, NATIVE_Opt); - std::vector workers(threads); - for (unsigned i = 0; i < threads; i++) { - std::function func = [&, i]() { - LLVMContext ctx; - ctx.setDiscardValueNames(true); - // Lazily deserialize the entire module - timers[i].deserialize.startTimer(); - auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx); - // Make sure this also fails with only julia, but not LLVM assertions enabled, - // otherwise, the first error we hit is the LLVM module verification failure, - // which will look very confusing, because the module was partially deserialized. - bool deser_succeeded = (bool)EM; - auto M = cantFail(std::move(EM), "Error loading module"); - assert(deser_succeeded); (void)deser_succeeded; - timers[i].deserialize.stopTimer(); - - timers[i].materialize.startTimer(); - materializePreserved(*M, partitions[i]); - timers[i].materialize.stopTimer(); - - timers[i].construct.startTimer(); - std::string suffix = "_" + std::to_string(i); - construct_vars(*M, partitions[i], suffix); - M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix)); - // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file - // or it may skip emitting debug info for that file. Here set it to ./julia#N - DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), "."); - if (M->getNamedMetadata("llvm.dbg.cu")) - for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands()) - CU->replaceOperandWith(0, topfile); - timers[i].construct.stopTimer(); - - outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out); - }; - auto arg = new std::function(func); - uv_thread_create(&workers[i], lambda_trampoline, arg); // Use libuv thread to avoid issues with stack sizes - } - - // Wait for all of the worker threads to finish - for (unsigned i = 0; i < threads; i++) - uv_thread_join(&workers[i]); - } - - output_timer.stopTimer(); - - if (!report_timings) { - timer_group.clear(); - } else { - timer_group.print(dbgs(), true); - for (auto &t : timers) { - t.print(dbgs(), true); - } - dbgs() << "Partition weights: ["; - bool comma = false; - for (auto &p : partitions) { - if (comma) - dbgs() << ", "; - else - comma = true; - dbgs() << p.weight; - } - dbgs() << "]\n"; - } - return outputs; -} + TargetMachine &TM; + unsigned threads; + bool unopt_out; + bool opt_out; + bool obj_out; + bool asm_out; + AOTOutputs output0; + // Timers for timing purposes + TimerGroup timer_group; + SmallVector timers{threads}; + Timer partition_timer{"partition", "Partition module", timer_group}; + Timer serialize_timer{"serialize", "Serialize module", timer_group}; + Timer output_timer{"output", "Add outputs", timer_group}; + bool report_timings{false}; + SmallVector partitions; + SmallVector serialized; +}; extern int jl_is_timing_passes; static unsigned compute_image_thread_count(const ModuleInfo &info) { @@ -2113,7 +2147,11 @@ void jl_dump_native_impl(void *native_code, } std::unique_ptr SourceTM( jl_ExecutionEngine->getTarget().createTargetMachine( +#if JL_LLVM_VERSION < 210000 TheTriple.getTriple(), +#else + TheTriple, +#endif jl_ExecutionEngine->getTargetCPU(), jl_ExecutionEngine->getTargetFeatureString(), jl_ExecutionEngine->getTargetOptions(), @@ -2134,8 +2172,8 @@ void jl_dump_native_impl(void *native_code, OverrideStackAlignment = M.getOverrideStackAlignment(); }); - auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) { - return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released); + auto start_compile = [&](Module &M, StringRef name, unsigned threads) { + return OutputAdder(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname); }; SmallVector sysimg_outputs; @@ -2146,7 +2184,11 @@ void jl_dump_native_impl(void *native_code, LLVMContext Context; Context.setDiscardValueNames(true); Module sysimgM("sysimg", Context); +#if JL_LLVM_VERSION < 210000 sysimgM.setTargetTriple(TheTriple.str()); +#else + sysimgM.setTargetTriple(TheTriple); +#endif sysimgM.setDataLayout(DL); sysimgM.setStackProtectorGuard(StackProtectorGuard); sysimgM.setOverrideStackAlignment(OverrideStackAlignment); @@ -2200,7 +2242,7 @@ void jl_dump_native_impl(void *native_code, // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE // to function as expected // no need to free the module/context, destructor handles that - sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {}); + sysimg_outputs = start_compile(sysimgM, "sysimg", 1).finish(); } const bool imaging_mode = true; @@ -2214,7 +2256,11 @@ void jl_dump_native_impl(void *native_code, data->M.withModuleDo([&](Module &dataM) { JL_TIMING(NATIVE_AOT, NATIVE_Setup); +#if JL_LLVM_VERSION < 210000 dataM.setTargetTriple(TheTriple.str()); +#else + dataM.setTargetTriple(TheTriple); +#endif dataM.setDataLayout(DL); dataM.setPICLevel(PICLevel::BigPIC); auto &Context = dataM.getContext(); @@ -2295,19 +2341,13 @@ void jl_dump_native_impl(void *native_code, }); { - // Don't use withModuleDo here since we delete the TSM midway through - auto TSCtx = data->M.getContext(); - auto lock = TSCtx.getLock(); - auto dataM = data->M.getModuleUnlocked(); - - data_outputs = compile(*dataM, "text", threads, [data, &lock, &TSCtx](Module &) { - // Delete data when add_output thinks it's done with it - // Saves memory for use when multithreading - auto lock2 = std::move(lock); - delete data; - // Drop last reference to shared LLVM::Context - auto TSCtx2 = std::move(TSCtx); + auto adder = data->M.withModuleDo([&] (auto &dataM) { + return start_compile(dataM, "text", threads); }); + // Delete data when add_output thinks it's done with it + // Saves memory for use when multithreading + delete data; + data_outputs = adder.finish(); } if (params->emit_metadata) { @@ -2315,7 +2355,11 @@ void jl_dump_native_impl(void *native_code, LLVMContext Context; Context.setDiscardValueNames(true); Module metadataM("metadata", Context); +#if JL_LLVM_VERSION < 210000 metadataM.setTargetTriple(TheTriple.str()); +#else + metadataM.setTargetTriple(TheTriple); +#endif metadataM.setDataLayout(DL); metadataM.setStackProtectorGuard(StackProtectorGuard); metadataM.setOverrideStackAlignment(OverrideStackAlignment); @@ -2406,7 +2450,7 @@ void jl_dump_native_impl(void *native_code, } // no need to free module/context, destructor handles that - metadata_outputs = compile(metadataM, "data", 1, [](Module &) {}); + metadata_outputs = start_compile(metadataM, "data", 1).finish(); } { @@ -2514,88 +2558,89 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - jl_codegen_params_t output(ctx, DL, TT); - output.params = ¶ms; - output.imaging_mode = jl_options.image_codegen; - output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - JL_GC_PUSH1(&output.temporary_roots); - jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output); - // while not required, also emit the cfunc thunks, based on the - // inferred ABIs of their targets in the current latest world, - // since otherwise it is challenging to see all relevant codes - jl_compiled_functions_t compiled_functions; - size_t latestworld = jl_atomic_load_acquire(&jl_world_counter); - for (cfunc_decl_t &cfunc : output.cfuncs) { - jl_value_t *sigt = cfunc.abi.sigt; - JL_GC_PROMISE_ROOTED(sigt); - jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0); - if (mi == jl_nothing) - continue; - jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim); - if (codeinst == nullptr || compiled_functions.count(codeinst)) - continue; - orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT); - jl_llvm_functions_t decls; - if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) - decls.functionObject = "jl_fptr_const_return"; - else - decls = jl_emit_codedecls(decl_m, codeinst, output); - compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)}; - } - generate_cfunc_thunks(output, compiled_functions); - emit_always_inline(m, output); - output.workqueue.clear(); - compiled_functions.clear(); - output.temporary_roots = nullptr; - JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it - - if (m) { - // if compilation succeeded, prepare to return the result - // Similar to jl_link_global from jitlayers.cpp, - // so that code_llvm shows similar codegen to the jit - for (auto &global : output.global_targets) { - if (jl_options.image_codegen) { - global.second->setLinkage(GlobalValue::ExternalLinkage); + withCodegenParamsDo(ctx, DL, TT, [&] (jl_codegen_params_t &output) { + output.params = ¶ms; + output.imaging_mode = jl_options.image_codegen; + output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); + JL_GC_PUSH1(&output.temporary_roots); + jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output); + // while not required, also emit the cfunc thunks, based on the + // inferred ABIs of their targets in the current latest world, + // since otherwise it is challenging to see all relevant codes + jl_compiled_functions_t compiled_functions; + size_t latestworld = jl_atomic_load_acquire(&jl_world_counter); + for (cfunc_decl_t &cfunc : output.cfuncs) { + jl_value_t *sigt = cfunc.abi.sigt; + JL_GC_PROMISE_ROOTED(sigt); + jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0); + if (mi == jl_nothing) + continue; + jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim); + if (codeinst == nullptr || compiled_functions.count(codeinst)) + continue; + orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT); + jl_llvm_functions_t decls; + if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) + decls.functionObject = "jl_fptr_const_return"; + else + decls = jl_emit_codedecls(decl_m, codeinst, output); + compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)}; + } + generate_cfunc_thunks(output, compiled_functions); + emit_always_inline(m, output); + output.workqueue.clear(); + compiled_functions.clear(); + output.temporary_roots = nullptr; + JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it + + if (m) { + // if compilation succeeded, prepare to return the result + // Similar to jl_link_global from jitlayers.cpp, + // so that code_llvm shows similar codegen to the jit + for (auto &global : output.global_targets) { + if (jl_options.image_codegen) { + global.second->setLinkage(GlobalValue::ExternalLinkage); + } + else { + auto p = literal_static_pointer_val(global.first, global.second->getValueType()); + Type *elty = PointerType::get(p->getContext(), 0); + // For pretty printing, when LLVM inlines the global initializer into its loads + auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent()); + global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType())); + global.second->setConstant(true); + global.second->setLinkage(GlobalValue::PrivateLinkage); + global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + global.second->setVisibility(GlobalValue::DefaultVisibility); + } } - else { - auto p = literal_static_pointer_val(global.first, global.second->getValueType()); - Type *elty = PointerType::get(p->getContext(), 0); - // For pretty printing, when LLVM inlines the global initializer into its loads - auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent()); - global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType())); - global.second->setConstant(true); - global.second->setLinkage(GlobalValue::PrivateLinkage); - global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); - global.second->setVisibility(GlobalValue::DefaultVisibility); + if (!jl_options.image_codegen) { + optimizeDLSyms(*m.getModuleUnlocked()); } - } - if (!jl_options.image_codegen) { - optimizeDLSyms(*m.getModuleUnlocked()); - } - assert(!verifyLLVMIR(*m.getModuleUnlocked())); - if (optimize) { - auto opts = OptimizationOptions::defaults(); - opts.sanitize_memory = params.sanitize_memory; - opts.sanitize_thread = params.sanitize_thread; - opts.sanitize_address = params.sanitize_address; - NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts}; - //Safe b/c context lock is held by output - PM.run(*m.getModuleUnlocked()); assert(!verifyLLVMIR(*m.getModuleUnlocked())); + if (optimize) { + auto opts = OptimizationOptions::defaults(); + opts.sanitize_memory = params.sanitize_memory; + opts.sanitize_thread = params.sanitize_thread; + opts.sanitize_address = params.sanitize_address; + NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts}; + //Safe b/c context lock is held by output + PM.run(*m.getModuleUnlocked()); + assert(!verifyLLVMIR(*m.getModuleUnlocked())); + } + const std::string *fname; + if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam") + getwrapper = false; + if (!getwrapper) + fname = &decls.specFunctionObject; + else + fname = &decls.functionObject; + F = cast(m.getModuleUnlocked()->getNamedValue(*fname)); } - const std::string *fname; - if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam") - getwrapper = false; - if (!getwrapper) - fname = &decls.specFunctionObject; - else - fname = &decls.functionObject; - F = cast(m.getModuleUnlocked()->getNamedValue(*fname)); - } - if (measure_compile_time_enabled) { - auto end = jl_hrtime(); - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); - } + if (measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } + }); } if (F) { dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m))); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 62f31e237f4b6..86e57f7208c69 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1009,8 +1009,13 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const // above problem won't be as serious. auto merged_ai = dst_ai.merge(src_ai); +#if JL_LLVM_VERSION < 210000 ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile, merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias); +#else + ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile, + merged_ai.toAAMDNodes()); +#endif } static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src, @@ -1023,8 +1028,13 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const ++EmittedMemcpys; auto merged_ai = dst_ai.merge(src_ai); +#if JL_LLVM_VERSION < 210000 ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile, merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias); +#else + ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile, + merged_ai.toAAMDNodes()); +#endif } template diff --git a/src/codegen.cpp b/src/codegen.cpp index e145c73dc6c38..1d71b152e89f0 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -605,6 +605,24 @@ AttributeSet Attributes(LLVMContext &C, std::initializer_list(attrs)); } +static inline Attribute NoCaptureAttr(LLVMContext &C) +{ +#if JL_LLVM_VERSION < 210000 + return Attribute::get(C, Attribute::NoCapture); +#else + return Attribute::getWithCaptureInfo(C, CaptureInfo(CaptureComponents::None)); +#endif +} + +static inline void addNoCaptureAttr(AttrBuilder ¶m) +{ +#if JL_LLVM_VERSION < 210000 + param.addAttribute(Attribute::NoCapture); +#else + param.addCapturesAttr(CaptureInfo(CaptureComponents::None)); +#endif +} + static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); } static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); } @@ -617,7 +635,7 @@ static AttributeList get_func_attrs(LLVMContext &C) AttributeSet(), Attributes(C, {Attribute::NonNull}), {AttributeSet(), - Attributes(C, {Attribute::NoAlias, Attribute::ReadOnly, Attribute::NoCapture, Attribute::NoUndef})}); + Attributes(C, {Attribute::NoAlias, Attribute::ReadOnly, Attribute::NoUndef}, {NoCaptureAttr(C)})}); } static AttributeList get_attrs_noreturn(LLVMContext &C) @@ -996,7 +1014,7 @@ static const auto jllockvalue_func = new JuliaFunction<>{ [](LLVMContext &C) { return AttributeList::get(C, AttributeSet(), AttributeSet(), - {Attributes(C, {Attribute::NoCapture})}); }, + {Attributes(C, {}, {NoCaptureAttr(C)})}); }, }; static const auto jlunlockvalue_func = new JuliaFunction<>{ XSTR(jl_unlock_value), @@ -1005,7 +1023,7 @@ static const auto jlunlockvalue_func = new JuliaFunction<>{ [](LLVMContext &C) { return AttributeList::get(C, AttributeSet(), AttributeSet(), - {Attributes(C, {Attribute::NoCapture})}); }, + {Attributes(C, {}, {NoCaptureAttr(C)})}); }, }; static const auto jllockfield_func = new JuliaFunction<>{ XSTR(jl_lock_field), @@ -1014,7 +1032,7 @@ static const auto jllockfield_func = new JuliaFunction<>{ [](LLVMContext &C) { return AttributeList::get(C, AttributeSet(), AttributeSet(), - {Attributes(C, {Attribute::NoCapture})}); }, + {Attributes(C, {}, {NoCaptureAttr(C)})}); }, }; static const auto jlunlockfield_func = new JuliaFunction<>{ XSTR(jl_unlock_field), @@ -1023,7 +1041,7 @@ static const auto jlunlockfield_func = new JuliaFunction<>{ [](LLVMContext &C) { return AttributeList::get(C, AttributeSet(), AttributeSet(), - {Attributes(C, {Attribute::NoCapture})}); }, + {Attributes(C, {}, {NoCaptureAttr(C)})}); }, }; static const auto jlenter_func = new JuliaFunction<>{ XSTR(jl_enter_handler), @@ -1489,7 +1507,7 @@ static const auto gc_loaded_func = new JuliaFunction<>{ RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addAttribute(Attribute::NoUndef); return AttributeList::get(C, AttributeSet::get(C,FnAttrs), AttributeSet::get(C,RetAttrs), - { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}), + { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}, {NoCaptureAttr(C)}), Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) }); }, }; @@ -1684,6 +1702,15 @@ struct jl_aliasinfo_t { // memory region non-aliasing. It should be deleted once the TBAA metadata // is improved to encode only memory layout and *not* memory regions. static jl_aliasinfo_t fromTBAA(jl_codectx_t &ctx, MDNode *tbaa); + + AAMDNodes toAAMDNodes() const + { +#if JL_LLVM_VERSION < 220000 + return AAMDNodes(tbaa, tbaa_struct, scope, noalias); +#else + return AAMDNodes(tbaa, tbaa_struct, scope, noalias, nullptr); +#endif + } }; // metadata tracking for a llvm Value* during codegen @@ -2681,7 +2708,11 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte m->addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); m->setDataLayout(DL); +#if JL_LLVM_VERSION < 210000 m->setTargetTriple(triple.str()); +#else + m->setTargetTriple(triple); +#endif if (triple.isOSWindows() && triple.getArch() == Triple::x86) { // tell Win32 to assume the stack is always 16-byte aligned, @@ -7759,7 +7790,7 @@ const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value crt = (jl_value_t*)jl_any_type; } bool toboxed; - Type *lcrt = _julia_struct_to_llvm(¶ms, *params.tsctx.getContext(), crt, &toboxed); + Type *lcrt = _julia_struct_to_llvm(¶ms, params.getContext(), crt, &toboxed); if (toboxed) lcrt = JuliaType::get_prjlvalue_ty(lcrt->getContext()); size_t nargs = jl_nparams(sigt)-1; @@ -7937,7 +7968,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module AttrBuilder param(M->getContext()); param.addStructRetAttr(srt); param.addAttribute(Attribute::NoAlias); - param.addAttribute(Attribute::NoCapture); + addNoCaptureAttr(param); param.addAttribute(Attribute::NoUndef); attrs.push_back(AttributeSet::get(M->getContext(), param)); assert(fsig.size() == 1); @@ -7945,7 +7976,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module if (props.cc == jl_returninfo_t::Union) { AttrBuilder param(M->getContext()); param.addAttribute(Attribute::NoAlias); - param.addAttribute(Attribute::NoCapture); + addNoCaptureAttr(param); param.addAttribute(Attribute::NoUndef); attrs.push_back(AttributeSet::get(M->getContext(), param)); assert(fsig.size() == 1); @@ -7954,7 +7985,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module if (props.return_roots) { AttrBuilder param(M->getContext()); param.addAttribute(Attribute::NoAlias); - param.addAttribute(Attribute::NoCapture); + addNoCaptureAttr(param); param.addAttribute(Attribute::NoUndef); attrs.push_back(AttributeSet::get(M->getContext(), param)); fsig.push_back(getPointerTy(M->getContext())); @@ -7988,7 +8019,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t ¶ms, Module AttrBuilder param(M->getContext()); Type *ty = et; if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer - param.addAttribute(Attribute::NoCapture); + addNoCaptureAttr(param); param.addAttribute(Attribute::ReadOnly); ty = PointerType::get(M->getContext(), AddressSpace::Derived); } @@ -8119,7 +8150,7 @@ static jl_llvm_functions_t size_t min_world = src->min_world; size_t max_world = src->max_world; jl_llvm_functions_t declarations; - jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world); + jl_codectx_t ctx(params.getContext(), params, min_world, max_world); jl_datatype_t *vatyp = NULL; JL_GC_PUSH2(&ctx.code, &vatyp); ctx.code = src->code; @@ -9884,7 +9915,9 @@ void linkFunctionBody(Function &Dst, Function &Src) Dst.setPersonalityFn(Src.getPersonalityFn()); if (Src.hasPersonalityFn()) Dst.setPersonalityFn(Src.getPersonalityFn()); +#if JL_LLVM_VERSION < 210000 assert(Src.IsNewDbgInfoFormat == Dst.IsNewDbgInfoFormat); +#endif // Copy over the metadata attachments without remapping. Dst.copyMetadata(&Src, 0); diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 752dc505092fa..98044f763745c 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -505,8 +505,13 @@ static int lookup_pointer( else { int havelock = jl_lock_profile_wr(); assert(havelock); (void)havelock; - info = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec); + auto lineinfo = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec); jl_unlock_profile_wr(); +#if JL_LLVM_VERSION < 210000 + info = std::move(lineinfo); +#else + info = std::move(lineinfo.value()); +#endif } jl_frame_t *frame = &(*frames)[i]; diff --git a/src/disasm.cpp b/src/disasm.cpp index a80f79218f509..2daa8bc2bcbb0 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -506,37 +506,38 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada auto TSM = std::unique_ptr(unwrap(dump->TSM)); //If TSM is not passed in, then the context MUST be locked externally. //RAII will release the lock - std::optional lock; - if (TSM) { - lock.emplace(TSM->getContext().getLock()); - } - Function *llvmf = cast(unwrap(dump->F)); - if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent())) - jl_error("jl_dump_function_ir: Expected Function* in a temporary Module"); - - LineNumberAnnotatedWriter AAW{"; ", false, debuginfo}; - if (!llvmf->getParent()) { - // print the function declaration as-is - llvmf->print(stream, &AAW); - delete llvmf; - } - else { - assert(TSM && TSM->getModuleUnlocked() == llvmf->getParent() && "Passed module was not the same as function parent!"); - auto m = TSM->getModuleUnlocked(); - if (strip_ir_metadata) { - std::string llvmfn(llvmf->getName()); - jl_strip_llvm_addrspaces(m); - jl_strip_llvm_debug(m, true, &AAW); - // rewriting the function type creates a new function, so look it up again - llvmf = m->getFunction(llvmfn); - } - if (dump_module) { - m->print(stream, &AAW); + orc::ThreadSafeContext TSCtx; + if (TSM) + TSCtx = TSM->getContext(); + withContextDo(TSCtx, [&] (LLVMContext*) { + Function *llvmf = cast(unwrap(dump->F)); + if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent())) + jl_error("jl_dump_function_ir: Expected Function* in a temporary Module"); + + LineNumberAnnotatedWriter AAW{"; ", false, debuginfo}; + if (!llvmf->getParent()) { + // print the function declaration as-is + llvmf->print(stream, &AAW); + delete llvmf; } else { - llvmf->print(stream, &AAW); + assert(TSM && TSM->getModuleUnlocked() == llvmf->getParent() && "Passed module was not the same as function parent!"); + auto m = TSM->getModuleUnlocked(); + if (strip_ir_metadata) { + std::string llvmfn(llvmf->getName()); + jl_strip_llvm_addrspaces(m); + jl_strip_llvm_debug(m, true, &AAW); + // rewriting the function type creates a new function, so look it up again + llvmf = m->getFunction(llvmfn); + } + if (dump_module) { + m->print(stream, &AAW); + } + else { + llvmf->print(stream, &AAW); + } } - } + }); } return jl_pchar_to_string(stream.str().data(), stream.str().size()); @@ -924,7 +925,11 @@ static void jl_dump_asm_internal( // LLVM will destroy the formatted stream, and we keep the raw stream. std::unique_ptr ustream(new formatted_raw_ostream(rstream)); std::unique_ptr Streamer( -#if JL_LLVM_VERSION >= 190000 +#if JL_LLVM_VERSION >= 210000 + TheTarget->createAsmStreamer(Ctx, std::move(ustream), + + std::move(IP), std::move(CE), std::move(MAB)) +#elif JL_LLVM_VERSION >= 190000 TheTarget->createAsmStreamer(Ctx, std::move(ustream), IP.release(), std::move(CE), std::move(MAB)) @@ -1268,8 +1273,8 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const OutputAsmDialect = 0; if (!strcmp(asm_variant, "intel")) OutputAsmDialect = 1; - MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter( - jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI); + std::unique_ptr InstPrinter(TM->getTarget().createMCInstPrinter( + jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI)); std::unique_ptr MAB(TM->getTarget().createMCAsmBackend( STI, MRI, Options)); std::unique_ptr MCE; @@ -1278,8 +1283,10 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const } auto FOut = std::make_unique(asmfile); std::unique_ptr S(TM->getTarget().createAsmStreamer( -#if JL_LLVM_VERSION >= 190000 - *Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB) +#if JL_LLVM_VERSION >= 210000 + *Context, std::move(FOut), std::move(InstPrinter), std::move(MCE), std::move(MAB) +#elif JL_LLVM_VERSION >= 190000 + *Context, std::move(FOut), InstPrinter.release(), std::move(MCE), std::move(MAB) #else *Context, std::move(FOut), true, true, InstPrinter, std::move(MCE), std::move(MAB), false diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 0773d1a6c16a1..af10521902790 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -14,6 +14,9 @@ #include #include #include +#if JL_LLVM_VERSION >= 210000 +# include +#endif #include #if JL_LLVM_VERSION >= 200000 #include @@ -299,8 +302,7 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi, orc::ThreadSafeModule result_m; std::string gf_thunk_name; - { - jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context + withCodegenParamsDo(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple(), [&] (jl_codegen_params_t ¶ms) { params.getContext().setDiscardValueNames(true); params.cache = true; params.imaging_mode = 0; @@ -320,7 +322,7 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi, SmallVector sharedmodules; finish_params(M, params, sharedmodules); assert(sharedmodules.empty()); - } + }); int8_t gc_state = jl_gc_safe_enter(ct->ptls); jl_ExecutionEngine->addModule(std::move(result_m)); uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name); @@ -599,11 +601,12 @@ static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE assert(waiting == std::get<1>(it->second)); std::get<1>(it->second) = 0; auto ¶ms = std::get<0>(it->second); - params.tsctx_lock = params.tsctx.getLock(); - waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint - assert(!waiting); (void)waiting; - Module *M = emittedmodules[codeinst].getModuleUnlocked(); - finish_params(M, params, sharedmodules); + params.withContextDo([&] (LLVMContext*) JL_NOTSAFEPOINT { + waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint + assert(!waiting); (void)waiting; + Module *M = emittedmodules[codeinst].getModuleUnlocked(); + finish_params(M, params, sharedmodules); + }); incompletemodules.erase(it); } // and then indicate this should be compiled now @@ -630,13 +633,14 @@ static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOT assert(it != incompletemodules.end()); if (--std::get<1>(it->second) == 0) { auto ¶ms = std::get<0>(it->second); - params.tsctx_lock = params.tsctx.getLock(); - assert(callee == it->first); - orc::ThreadSafeModule &M = emittedmodules[callee]; - emit_always_inline(M, params); // may safepoint - int waiting = jl_analyze_workqueue(callee, params); // may safepoint - assert(!waiting); (void)waiting; - finish_params(M.getModuleUnlocked(), params, sharedmodules); + params.withContextDo([&] (LLVMContext*) { + assert(callee == it->first); + orc::ThreadSafeModule &M = emittedmodules[callee]; + emit_always_inline(M, params); // may safepoint + int waiting = jl_analyze_workqueue(callee, params); // may safepoint + assert(!waiting); (void)waiting; + finish_params(M.getModuleUnlocked(), params, sharedmodules); + }); incompletemodules.erase(it); } } @@ -656,10 +660,9 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) if (!sharedmodules.empty()) { auto TSM = sharedmodules.pop_back_val(); lock.native.unlock(); - { - auto Lock = TSM.getContext().getLock(); + withContextDo(TSM.getContext(), [&] (LLVMContext*) { jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint - } + }); jl_ExecutionEngine->addModule(std::move(TSM)); lock.native.lock(); } @@ -675,10 +678,9 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) emittedmodules.erase(TSMref); lock.native.unlock(); uint64_t start_time = jl_hrtime(); - { - auto Lock = TSM.getContext().getLock(); + withContextDo(TSM.getContext(), [&] (LLVMContext*) { jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint - } + }); jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint // If logging of the compilation stream is enabled, // then dump the method-instance specialization type to the stream @@ -809,26 +811,31 @@ void jl_emit_codeinst_to_jit_impl( } JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); // emit the code in LLVM IR form to the new context - jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context - params.getContext().setDiscardValueNames(true); - params.cache = true; - params.imaging_mode = 0; - orc::ThreadSafeModule result_m = - jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple); - params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); - JL_GC_PUSH1(¶ms.temporary_roots); - jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints - if (!result_m) { + orc::ThreadSafeModule result_m; + jl_llvm_functions_t decls; + jl_codegen_params_t params(std::make_unique(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple(), std::defer_lock); + auto exit = params.withContextDo([&] (LLVMContext *ctx) { + ctx->setDiscardValueNames(true); + params.cache = true; + params.imaging_mode = 0; + result_m = + jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple); + params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); + JL_GC_PUSH1(¶ms.temporary_roots); + decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints + if (!result_m) { + JL_GC_POP(); + return true; + } + jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints + params.temporary_roots = nullptr; + params.temporary_roots_set.clear(); JL_GC_POP(); + return false; + }); + if (exit) return; - } - jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints - params.temporary_roots = nullptr; - params.temporary_roots_set.clear(); - JL_GC_POP(); - { // drop lock before acquiring engine_lock - auto release = std::move(params.tsctx_lock); - } + // drop codegen params lock before acquiring engine_lock jl_unique_gcsafe_lock lock(engine_lock); if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst)) return; // destroy everything @@ -852,16 +859,17 @@ void jl_emit_codeinst_to_jit_impl( jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, jl_fptr_wait_for_compiled_addr); invokenames[codeinst] = std::move(decls); complete_emit(codeinst); - params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock - emit_always_inline(result_m, params); - int waiting = jl_analyze_workqueue(codeinst, params); - if (waiting) { - auto release = std::move(params.tsctx_lock); // unlock again before moving from it - incompletemodules.try_emplace(codeinst, std::move(params), waiting); - } - else { - finish_params(result_m.getModuleUnlocked(), params, sharedmodules); - } + params.withContextDo([&] (LLVMContext *ctx) { + // re-acquire lock + emit_always_inline(result_m, params); + int waiting = jl_analyze_workqueue(codeinst, params); + if (waiting) { + incompletemodules.try_emplace(codeinst, std::move(params), waiting); + } + else { + finish_params(result_m.getModuleUnlocked(), params, sharedmodules); + } + }); emittedmodules[codeinst] = std::move(result_m); } @@ -1223,6 +1231,8 @@ std::unique_ptr createJITLinkMemoryManager() JL_N #endif } +#if defined(JL_USE_JITLINK) && defined(LLVM_SHLIB) +# if JL_LLVM_VERSION < 210000 class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { public: Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override { @@ -1235,6 +1245,72 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { return Error::success(); } }; +# else +class JLEHFrameRegistrationPlugin final : public LinkGraphLinkingLayer::Plugin { + static Error registerEHFrameWrapper(orc::ExecutorAddrRange EHFrame) { + register_eh_frames(EHFrame.Start.toPtr(), static_cast(EHFrame.size())); + return Error::success(); + } + + static Error deregisterEHFrameWrapper(orc::ExecutorAddrRange EHFrame) { + deregister_eh_frames(EHFrame.Start.toPtr(), static_cast(EHFrame.size())); + return Error::success(); + } + + static orc::shared::CWrapperFunctionResult + registerEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) { + using namespace llvm::orc::shared; + return WrapperFunction::handle( + ArgData, ArgSize, registerEHFrameWrapper) + .release(); + } + + static orc::shared::CWrapperFunctionResult + deregisterEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) { + using namespace llvm::orc::shared; + return WrapperFunction::handle( + ArgData, ArgSize, deregisterEHFrameWrapper) + .release(); + } + + static Error postFixup(jitlink::LinkGraph &G) + { + using namespace llvm::orc::shared; + auto registerFrame = ExecutorAddr::fromPtr(registerEHFrameSectionAllocAction); + auto deregisterFrame = ExecutorAddr::fromPtr(deregisterEHFrameSectionAllocAction); + if (auto *EHFrame = jitlink::getEHFrameSection(G)) { + auto R = jitlink::SectionRange(*EHFrame).getRange(); + G.allocActions().push_back( + {cantFail( + WrapperFunctionCall::Create>( + registerFrame, R)), + cantFail( + WrapperFunctionCall::Create>( + deregisterFrame, R))}); + } + return Error::success(); + } + +public: + JLEHFrameRegistrationPlugin() {} + + void modifyPassConfig(MaterializationResponsibility&, + jitlink::LinkGraph&, + jitlink::PassConfiguration &PassConfig) override + { + PassConfig.PostFixupPasses.push_back(postFixup); + } + Error notifyFailed(MaterializationResponsibility&) override { + return Error::success(); + } + Error notifyRemovingResources(JITDylib&, ResourceKey) override { + return Error::success(); + } + void notifyTransferringResources(JITDylib&, ResourceKey, + ResourceKey) override {} +}; +# endif +#endif RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; @@ -1401,7 +1477,12 @@ namespace { } auto optlevel = CodeGenOptLevelFor(jl_options.opt_level); auto TM = TheTarget->createTargetMachine( - TheTriple.getTriple(), TheCPU, FeaturesStr, +#if JL_LLVM_VERSION < 210000 + TheTriple.getTriple(), +#else + TheTriple, +#endif + TheCPU, FeaturesStr, options, relocmodel, codemodel, @@ -1931,7 +2012,8 @@ JuliaOJIT::JuliaOJIT() MemMgr(createRTDyldMemoryManager()), UnlockedObjectLayer( ES, - [this]() { + [this](auto&&...) { + // LLVM 21+ passes in a memory buffer std::unique_ptr result(new ForwardingMemoryManager(MemMgr)); return result; } @@ -1946,15 +2028,26 @@ JuliaOJIT::JuliaOJIT() OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)) { #ifdef JL_USE_JITLINK -# if defined(LLVM_SHLIB) +# if JL_LLVM_VERSION < 210000 +# if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code // also used with RTDyld to inform both our and the libc copy of libunwind. auto ehRegistrar = std::make_unique(); -# else +# else auto ehRegistrar = std::make_unique(); -# endif +# endif ObjectLayer.addPlugin(std::make_unique( ES, std::move(ehRegistrar))); +#else + // llvm's EHFrameRegistrationPlugin does not seem to have any customization + // hooks in 21+. Do our own registration with a separate plugin instead. +# if defined(LLVM_SHLIB) + // When dynamically linking against LLVM, use our custom EH frame registration code + // also used with RTDyld to inform both our and the libc copy of libunwind. + ObjectLayer.addPlugin(std::make_unique()); +# endif + ObjectLayer.addPlugin(std::move(EHFrameRegistrationPlugin::Create(ES).get())); +#endif ObjectLayer.addPlugin(std::make_unique()); ObjectLayer.addPlugin(std::make_unique(&jit_bytes_size)); @@ -2105,27 +2198,28 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) TSM = selectOptLevel(std::move(TSM)); TSM = (*Optimizers)(std::move(TSM)); TSM = (*JITPointers)(std::move(TSM)); - auto Lock = TSM.getContext().getLock(); - Module &M = *TSM.getModuleUnlocked(); - - for (auto &f : M) { - if (!f.isDeclaration()){ - jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str()); + auto Obj = withContextDo(TSM.getContext(), [&] (LLVMContext*) { + Module &M = *TSM.getModuleUnlocked(); + for (auto &f : M) { + if (!f.isDeclaration()){ + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str()); + } } - } - // Treat this as if one of the passes might contain a safepoint - // even though that shouldn't be the case and might be unwise - Expected> Obj = CompileLayer.getCompiler()(M); - if (!Obj) { + // Treat this as if one of the passes might contain a safepoint + // even though that shouldn't be the case and might be unwise + auto Obj = CompileLayer.getCompiler()(M); + if (!Obj) { #ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint - ES.reportError(Obj.takeError()); + ES.reportError(Obj.takeError()); #endif - errs() << "Failed to add module to JIT!\n"; - errs() << "Dumping failing module\n" << M << "\n"; + errs() << "Failed to add module to JIT!\n"; + errs() << "Dumping failing module\n" << M << "\n"; + } + return Obj; + }); + if (!Obj) return; - } - { auto release = std::move(Lock); } auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj)); if (Err) { #ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint @@ -2387,7 +2481,11 @@ std::unique_ptr JuliaOJIT::cloneTargetMachine() const { auto NewTM = std::unique_ptr(getTarget() .createTargetMachine( +#if JL_LLVM_VERSION < 210000 getTargetTriple().str(), +#else + getTargetTriple(), +#endif getTargetCPU(), getTargetFeatureString(), getTargetOptions(), diff --git a/src/jitlayers.h b/src/jitlayers.h index 5c1fb10a84205..abf97e9e0e89d 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -1,5 +1,6 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include #include @@ -246,15 +247,32 @@ struct cfunc_decl_t { typedef SmallVector, 0> jl_workqueue_t; +template +auto withContextDo(orc::ThreadSafeContext &tsctx, CB &&cb) +{ +#if JL_LLVM_VERSION < 210000 + auto lock = tsctx.getLock(); + return cb(tsctx.getContext()); +#else + return tsctx.withContextDo(std::forward(cb)); +#endif +} + +template +auto withContextDo(orc::ThreadSafeContext &&tsctx, CB &&cb) +{ + return withContextDo(tsctx, std::forward(cb)); +} + typedef std::list> CallFrames; struct jl_codegen_params_t { orc::ThreadSafeContext tsctx; - orc::ThreadSafeContext::Lock tsctx_lock; + LLVMContext *_ctx{nullptr}; DataLayout DL; Triple TargetTriple; inline LLVMContext &getContext() JL_NOTSAFEPOINT { - return *tsctx.getContext(); + return *_ctx; } typedef StringMap SymMapGV; // outputs @@ -293,9 +311,9 @@ struct jl_codegen_params_t { bool imaging_mode; bool safepoint_on_entry = true; bool use_swiftcc = true; - jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER + jl_codegen_params_t(orc::ThreadSafeContext ctx, + DataLayout DL, Triple triple, std::defer_lock_t) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER : tsctx(std::move(ctx)), - tsctx_lock(tsctx.getLock()), DL(std::move(DL)), TargetTriple(std::move(triple)), imaging_mode(1) @@ -306,8 +324,28 @@ struct jl_codegen_params_t { } jl_codegen_params_t(jl_codegen_params_t &&) JL_NOTSAFEPOINT = default; ~jl_codegen_params_t() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default; + + template + auto withContextDo(CB &&cb) { + return ::withContextDo(tsctx, [&] (LLVMContext *_ctx) { + this->_ctx = _ctx; + auto guard = make_scope_exit([&] { this->_ctx = nullptr; }); + return cb(_ctx); + }); + } }; +template +auto withCodegenParamsDo(orc::ThreadSafeContext ctx, DataLayout DL, + Triple triple, CB &&cb) +{ + jl_codegen_params_t params(std::move(ctx), std::move(DL), + std::move(triple), std::defer_lock); + return params.withContextDo([&] (LLVMContext*) { + return cb(params); + }); +} + const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t ¶ms); jl_llvm_functions_t jl_emit_code( @@ -689,8 +727,9 @@ class JuliaOJIT { extern JuliaOJIT *jl_ExecutionEngine; std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT; inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT { - auto lock = ctx.getLock(); - return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx); + return withContextDo(ctx, [&] (LLVMContext *_ctx) JL_NOTSAFEPOINT { + return orc::ThreadSafeModule(jl_create_llvm_module(name, *_ctx, DL, triple), ctx); + }); } Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {