diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 7aa29786fa64b..c520904cd1f03 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -541,7 +541,9 @@ Function *IRLinker_copyFunctionProto(Module *DstM, Function *SF) {
   auto *F = Function::Create(SF->getFunctionType(), SF->getLinkage(),
                              SF->getAddressSpace(), SF->getName(), DstM);
   F->copyAttributesFrom(SF);
+#if JL_LLVM_VERSION < 210000
   F->IsNewDbgInfoFormat = SF->IsNewDbgInfoFormat;
+#endif
 
   // Remove these copied constants since they point to the source module.
   F->setPersonalityFn(nullptr);
@@ -796,234 +798,235 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm
         return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
     });
     egal_set method_roots;
-    jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
-    if (!llvmmod)
-        params.getContext().setDiscardValueNames(true);
-    params.params = &target_cgparams;
-    assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled
-    params.external_linkage = external_linkage;
-    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
-    bool safepoint_on_entry = params.safepoint_on_entry;
-    JL_GC_PUSH3(&params.temporary_roots, &method_roots.list, &method_roots.keyset);
-    jl_compiled_functions_t compiled_functions;
-    size_t i, l;
-    for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
-        // each item in this list is either a CodeInstance followed by a CodeInfo indicating something
-        // to compile, or a rettype followed by a sig describing a C-callable alias to create.
-        jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
-        if (jl_is_code_instance(item)) {
-            // now add it to our compilation results
-            jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
-            jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
-            assert(jl_is_code_info(src));
-            if (compiled_functions.count(codeinst))
-                continue; // skip any duplicates that accidentally made there way in here (or make this an error?)
-            if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX)
-                params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable
-            orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)),
-                    params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
-                    Triple(clone.getModuleUnlocked()->getTargetTriple()));
-            jl_llvm_functions_t decls;
-            if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
-                decls.functionObject = "jl_fptr_const_return";
-            else
-                decls = jl_emit_codeinst(result_m, codeinst, src, params);
-            params.safepoint_on_entry = safepoint_on_entry;
-            record_method_roots(method_roots, jl_get_ci_mi(codeinst));
-            if (result_m)
-                compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+    return withCodegenParamsDo(ctxt, std::move(target_info.first), std::move(target_info.second), [&] (jl_codegen_params_t &params) {
+        if (!llvmmod)
+            params.getContext().setDiscardValueNames(true);
+        params.params = &target_cgparams;
+        assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled
+        params.external_linkage = external_linkage;
+        params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+        bool safepoint_on_entry = params.safepoint_on_entry;
+        JL_GC_PUSH3(&params.temporary_roots, &method_roots.list, &method_roots.keyset);
+        jl_compiled_functions_t compiled_functions;
+        size_t i, l;
+        for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
+            // each item in this list is either a CodeInstance followed by a CodeInfo indicating something
+            // to compile, or a rettype followed by a sig describing a C-callable alias to create.
+            jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
+            if (jl_is_code_instance(item)) {
+                // now add it to our compilation results
+                jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
+                jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
+                assert(jl_is_code_info(src));
+                if (compiled_functions.count(codeinst))
+                    continue; // skip any duplicates that accidentally made there way in here (or make this an error?)
+                if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX)
+                    params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable
+                orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)),
+                                                                     params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
+                                                                     Triple(clone.getModuleUnlocked()->getTargetTriple()));
+                jl_llvm_functions_t decls;
+                if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
+                    decls.functionObject = "jl_fptr_const_return";
+                else
+                    decls = jl_emit_codeinst(result_m, codeinst, src, params);
+                params.safepoint_on_entry = safepoint_on_entry;
+                record_method_roots(method_roots, jl_get_ci_mi(codeinst));
+                if (result_m)
+                    compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+            }
+            else {
+                assert(jl_is_simplevector(item));
+                jl_value_t *rt = jl_svecref(item, 0);
+                jl_value_t *sig = jl_svecref(item, 1);
+                jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2);
+                assert(jl_is_type(rt) && jl_is_type(sig));
+                jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params);
+            }
         }
-        else {
-            assert(jl_is_simplevector(item));
-            jl_value_t *rt = jl_svecref(item, 0);
-            jl_value_t *sig = jl_svecref(item, 1);
-            jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2);
-            assert(jl_is_type(rt) && jl_is_type(sig));
-            jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params);
-        }
-    }
-    // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them
-    resolve_workqueue(params, method_roots, compiled_functions);
-    // including generating cfunction thunks
-    generate_cfunc_thunks(params, compiled_functions);
-    aot_optimize_roots(params, method_roots, compiled_functions);
-    params.temporary_roots = nullptr;
-    params.temporary_roots_set.clear();
-    JL_GC_POP();
-
-    // process the globals array, before jl_merge_module destroys them
-    SmallVector<std::string, 0> gvars(params.global_targets.size());
-    data->jl_value_to_llvm.resize(params.global_targets.size());
-    StringSet<> gvars_names;
-    DenseSet<GlobalValue *> gvars_set;
-
-    size_t idx = 0;
-    for (auto &global : params.global_targets) {
-        gvars[idx] = global.second->getName().str();
-        assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
-        assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
-        data->jl_value_to_llvm[idx] = global.first;
-        idx++;
-    }
-    CreateNativeMethods += compiled_functions.size();
-
-    size_t offset = gvars.size();
-    data->jl_external_to_llvm.resize(params.external_fns.size());
-
-    for (auto &extern_fn : params.external_fns) {
-        jl_code_instance_t *this_code = std::get<0>(extern_fn.first);
-        bool specsig = std::get<1>(extern_fn.first);
-        assert(specsig && "Error external_fns doesn't handle non-specsig yet");
-        (void) specsig;
-        GlobalVariable *F = extern_fn.second;
-        size_t idx = gvars.size() - offset;
-        assert(idx >= 0);
-        assert(idx < data->jl_external_to_llvm.size());
-        data->jl_external_to_llvm[idx] = this_code;
-        assert(gvars_set.insert(F).second && "Duplicate gvar in params!");
-        assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!");
-        gvars.push_back(std::string(F->getName()));
-    }
-
-    // clones the contents of the module `m` to the shadow_output collector
-    // while examining and recording what kind of function pointer we have
-    {
-        Linker L(*clone.getModuleUnlocked());
-        for (auto &def : compiled_functions) {
-            jl_code_instance_t *this_code = def.first;
-            JL_GC_PROMISE_ROOTED(this_code);
-            jl_llvm_functions_t &decls = def.second.decls;
-            StringRef func = decls.functionObject;
-            StringRef cfunc = decls.specFunctionObject;
-            orc::ThreadSafeModule &M = def.second.TSM;
-            if (external_linkage) {
-                uint8_t specsigflags;
-                jl_callptr_t invoke;
-                void *fptr;
-                jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0);
-                if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) {
-                    // this codeinst is already available externally: keep it only if canPartition demands it for local use
-                    // TODO: for performance, avoid generating the src code when we know it would reach here anyways?
-                    if (M.withModuleDo([&](Module &M) { return !canPartition(*cast<Function>(M.getNamedValue(cfunc))); })) {
-                        jl_merge_module(L, std::move(M));
+        // finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them
+        resolve_workqueue(params, method_roots, compiled_functions);
+        // including generating cfunction thunks
+        generate_cfunc_thunks(params, compiled_functions);
+        aot_optimize_roots(params, method_roots, compiled_functions);
+        params.temporary_roots = nullptr;
+        params.temporary_roots_set.clear();
+        JL_GC_POP();
+
+        // process the globals array, before jl_merge_module destroys them
+        SmallVector<std::string, 0> gvars(params.global_targets.size());
+        data->jl_value_to_llvm.resize(params.global_targets.size());
+        StringSet<> gvars_names;
+        DenseSet<GlobalValue *> gvars_set;
+
+        size_t idx = 0;
+        for (auto &global : params.global_targets) {
+            gvars[idx] = global.second->getName().str();
+            assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
+            assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
+            data->jl_value_to_llvm[idx] = global.first;
+            idx++;
+        }
+        CreateNativeMethods += compiled_functions.size();
+
+        size_t offset = gvars.size();
+        data->jl_external_to_llvm.resize(params.external_fns.size());
+
+        for (auto &extern_fn : params.external_fns) {
+            jl_code_instance_t *this_code = std::get<0>(extern_fn.first);
+            bool specsig = std::get<1>(extern_fn.first);
+            assert(specsig && "Error external_fns doesn't handle non-specsig yet");
+            (void) specsig;
+            GlobalVariable *F = extern_fn.second;
+            size_t idx = gvars.size() - offset;
+            assert(idx >= 0);
+            assert(idx < data->jl_external_to_llvm.size());
+            data->jl_external_to_llvm[idx] = this_code;
+            assert(gvars_set.insert(F).second && "Duplicate gvar in params!");
+            assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!");
+            gvars.push_back(std::string(F->getName()));
+        }
+
+        // clones the contents of the module `m` to the shadow_output collector
+        // while examining and recording what kind of function pointer we have
+        {
+            Linker L(*clone.getModuleUnlocked());
+            for (auto &def : compiled_functions) {
+                jl_code_instance_t *this_code = def.first;
+                JL_GC_PROMISE_ROOTED(this_code);
+                jl_llvm_functions_t &decls = def.second.decls;
+                StringRef func = decls.functionObject;
+                StringRef cfunc = decls.specFunctionObject;
+                orc::ThreadSafeModule &M = def.second.TSM;
+                if (external_linkage) {
+                    uint8_t specsigflags;
+                    jl_callptr_t invoke;
+                    void *fptr;
+                    jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0);
+                    if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) {
+                        // this codeinst is already available externally: keep it only if canPartition demands it for local use
+                        // TODO: for performance, avoid generating the src code when we know it would reach here anyways?
+                        if (M.withModuleDo([&](Module &M) { return !canPartition(*cast<Function>(M.getNamedValue(cfunc))); })) {
+                            jl_merge_module(L, std::move(M));
+                        }
+                        continue;
                     }
-                    continue;
                 }
+                jl_merge_module(L, std::move(M));
+                uint32_t func_id = 0;
+                uint32_t cfunc_id = 0;
+                if (func == "jl_fptr_args") {
+                    func_id = -1;
+                }
+                else if (func == "jl_fptr_sparam") {
+                    func_id = -2;
+                }
+                else if (func == "jl_f_opaque_closure_call") {
+                    func_id = -4;
+                }
+                else if (func == "jl_fptr_const_return") {
+                    func_id = -5;
+                }
+                else {
+                    //Safe b/c context is locked by params
+                    data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
+                    func_id = data->jl_sysimg_fvars.size();
+                }
+                if (!cfunc.empty()) {
+                    //Safe b/c context is locked by params
+                    data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
+                    cfunc_id = data->jl_sysimg_fvars.size();
+                }
+                data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
             }
-            jl_merge_module(L, std::move(M));
-            uint32_t func_id = 0;
-            uint32_t cfunc_id = 0;
-            if (func == "jl_fptr_args") {
-                func_id = -1;
-            }
-            else if (func == "jl_fptr_sparam") {
-                func_id = -2;
-            }
-            else if (func == "jl_f_opaque_closure_call") {
-                func_id = -4;
-            }
-            else if (func == "jl_fptr_const_return") {
-                func_id = -5;
-            }
-            else {
-                //Safe b/c context is locked by params
-                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
-                func_id = data->jl_sysimg_fvars.size();
-            }
-            if (!cfunc.empty()) {
-                //Safe b/c context is locked by params
-                data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
-                cfunc_id = data->jl_sysimg_fvars.size();
+            bool Changed = true;
+            while (Changed) {
+                Changed = false;
+                // make sure everything referenced got included though, since some functions aren't
+                // correctly implemented by staticdata for external use, and so codegen won't emit
+                // an external reference but expects a private copy here instead
+                for (auto &def : compiled_functions) {
+                    orc::ThreadSafeModule &M = def.second.TSM;
+                    if (!M)
+                        continue;
+                    jl_llvm_functions_t &decls = def.second.decls;
+                    StringRef func = decls.functionObject;
+                    StringRef cfunc = decls.specFunctionObject;
+                    if (func != "jl_fptr_args" &&
+                        func != "jl_fptr_sparam" &&
+                        func != "jl_f_opaque_closure_call" &&
+                        clone.getModuleUnlocked()->getNamedValue(func)) {
+                        jl_merge_module(L, std::move(M));
+                        Changed = true;
+                        continue;
+                    }
+                    if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) {
+                        Changed = true;
+                        jl_merge_module(L, std::move(M));
+                    }
+                }
             }
-            data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
-        }
-        bool Changed = true;
-        while (Changed) {
-            Changed = false;
-            // make sure everything referenced got included though, since some functions aren't
-            // correctly implemented by staticdata for external use, and so codegen won't emit
-            // an external reference but expects a private copy here instead
+#ifndef NDEBUG
+            // make sure we didn't forget anything that we promised to include in here
             for (auto &def : compiled_functions) {
-                orc::ThreadSafeModule &M = def.second.TSM;
-                if (!M)
-                    continue;
                 jl_llvm_functions_t &decls = def.second.decls;
                 StringRef func = decls.functionObject;
                 StringRef cfunc = decls.specFunctionObject;
                 if (func != "jl_fptr_args" &&
                     func != "jl_fptr_sparam" &&
-                    func != "jl_f_opaque_closure_call" &&
-                    clone.getModuleUnlocked()->getNamedValue(func)) {
-                    jl_merge_module(L, std::move(M));
-                    Changed = true;
-                    continue;
+                    func != "jl_f_opaque_closure_call") {
+                    GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func);
+                    assert(!F || !F->isDeclaration());
                 }
-                if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) {
-                    Changed = true;
-                    jl_merge_module(L, std::move(M));
+                if (!cfunc.empty()) {
+                    GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc);
+                    assert(!F || !F->isDeclaration());
                 }
             }
-        }
-#ifndef NDEBUG
-        // make sure we didn't forget anything that we promised to include in here
-        for (auto &def : compiled_functions) {
-            jl_llvm_functions_t &decls = def.second.decls;
-            StringRef func = decls.functionObject;
-            StringRef cfunc = decls.specFunctionObject;
-            if (func != "jl_fptr_args" &&
-                func != "jl_fptr_sparam" &&
-                func != "jl_f_opaque_closure_call") {
-                GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func);
-                assert(!F || !F->isDeclaration());
-            }
-            if (!cfunc.empty()) {
-                GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc);
-                assert(!F || !F->isDeclaration());
-            }
-        }
 #endif
-        compiled_functions.clear();
-        if (params._shared_module) {
-            bool error = L.linkInModule(std::move(params._shared_module));
-            assert(!error && "Error linking in shared module");
-            (void)error;
-        }
-    }
-
-    // now get references to the globals in the merged module
-    // and set them to be internalized and initialized at startup
-    // filter out any gvars that got optimized away
-    idx = 0;
-    size_t newoffset = 0;
-    size_t newidx = 0;
-    for (auto &global : gvars) {
-        //Safe b/c context is locked by params
-        GlobalVariable *G = cast_or_null<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
-        if (G != nullptr) {
-            assert(!G->hasInitializer());
-            G->setInitializer(Constant::getNullValue(G->getValueType()));
-            G->setLinkage(GlobalValue::InternalLinkage);
-            G->setDSOLocal(true);
-            assert(newidx == data->jl_sysimg_gvars.size());
-            if (idx < offset) {
-                data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx];
-                newoffset = newidx + 1;
+            compiled_functions.clear();
+            if (params._shared_module) {
+                bool error = L.linkInModule(std::move(params._shared_module));
+                assert(!error && "Error linking in shared module");
+                (void)error;
             }
-            else {
-                data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset];
+        }
+
+        // now get references to the globals in the merged module
+        // and set them to be internalized and initialized at startup
+        // filter out any gvars that got optimized away
+        idx = 0;
+        size_t newoffset = 0;
+        size_t newidx = 0;
+        for (auto &global : gvars) {
+            //Safe b/c context is locked by params
+            GlobalVariable *G = cast_or_null<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
+            if (G != nullptr) {
+                assert(!G->hasInitializer());
+                G->setInitializer(Constant::getNullValue(G->getValueType()));
+                G->setLinkage(GlobalValue::InternalLinkage);
+                G->setDSOLocal(true);
+                assert(newidx == data->jl_sysimg_gvars.size());
+                if (idx < offset) {
+                    data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx];
+                    newoffset = newidx + 1;
+                }
+                else {
+                    data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset];
+                }
+                data->jl_sysimg_gvars.push_back(G);
+                newidx++;
             }
-            data->jl_sysimg_gvars.push_back(G);
-            newidx++;
+            idx++;
         }
-        idx++;
-    }
-    data->jl_value_to_llvm.resize(newoffset);
-    data->jl_external_to_llvm.resize(newidx - newoffset);
-    gvars.clear();
-    CreateNativeGlobals += idx;
+        data->jl_value_to_llvm.resize(newoffset);
+        data->jl_external_to_llvm.resize(newidx - newoffset);
+        gvars.clear();
+        CreateNativeGlobals += idx;
 
-    data->M = std::move(clone);
-    return (void*)data;
+        data->M = std::move(clone);
+        return (void*)data;
+    });
 }
 
 static object::Archive::Kind getDefaultForHost(Triple &triple)
@@ -1533,7 +1536,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
     AOTOutputs out;
     auto TM = std::unique_ptr<TargetMachine>(
         SourceTM.getTarget().createTargetMachine(
+#if JL_LLVM_VERSION < 210000
             SourceTM.getTargetTriple().str(),
+#else
+            SourceTM.getTargetTriple(),
+#endif
             SourceTM.getTargetCPU(),
             SourceTM.getTargetFeatureString(),
             SourceTM.Options,
@@ -1561,7 +1568,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
 
         auto PMTM = std::unique_ptr<TargetMachine>(
             SourceTM.getTarget().createTargetMachine(
+#if JL_LLVM_VERSION < 210000
                 SourceTM.getTargetTriple().str(),
+#else
+                SourceTM.getTargetTriple(),
+#endif
                 SourceTM.getTargetCPU(),
                 SourceTM.getTargetFeatureString(),
                 SourceTM.Options,
@@ -1835,71 +1846,156 @@ static void construct_vars(Module &M, Partition &partition, StringRef suffix) {
     gidxs_var->setDSOLocal(true);
 }
 
-extern "C" void lambda_trampoline(void* arg) {
-    std::function<void()>* func = static_cast<std::function<void()>*>(arg);
-    (*func)();
-    delete func;
+template<typename CB>
+static inline void schedule_uv_thread(uv_thread_t *worker, CB &&cb)
+{
+    auto func = new CB(std::move(cb));
+    // Use libuv thread to avoid issues with stack sizes
+    uv_thread_create(worker, [] (void *arg) {
+        auto func = static_cast<CB*>(arg);
+        (*func)();
+        delete func;
+    }, func);
 }
 
 // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
 // as well as partitioning, serialization, and deserialization.
-template<typename ModuleReleasedFunc>
-static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads,
-                bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) {
-    SmallVector<AOTOutputs, 16> outputs(threads);
-    assert(threads);
-    assert(unopt_out || opt_out || obj_out || asm_out);
-    // Timers for timing purposes
-    TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str());
-    SmallVector<ShardTimers, 1> timers(threads);
-    for (unsigned i = 0; i < threads; ++i) {
-        auto idx = std::to_string(i);
-        timers[i].name = "shard_" + idx;
-        timers[i].desc = ("Timings for " + name + " module shard " + idx).str();
-        timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
-        timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
-        timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
-        timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
-        timers[i].optimize.init("optimize_" + idx, "Optimize shard");
-        timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
-        timers[i].obj.init("obj_" + idx, "Emit object file");
-        timers[i].asm_.init("asm_" + idx, "Emit assembly file");
-    }
-    Timer partition_timer("partition", "Partition module", timer_group);
-    Timer serialize_timer("serialize", "Serialize module", timer_group);
-    Timer output_timer("output", "Add outputs", timer_group);
-    bool report_timings = false;
-    if (auto env = getenv("JULIA_IMAGE_TIMINGS")) {
-        char *endptr;
-        unsigned long val = strtoul(env, &endptr, 10);
-        if (endptr != env && !*endptr && val <= 1) {
-            report_timings = val;
-        } else {
-            if (StringRef("true").compare_insensitive(env) == 0)
-                report_timings = true;
-            else if (StringRef("false").compare_insensitive(env) == 0)
-                report_timings = false;
-            else
-                errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n";
+
+// This is more or less a manual co-routine version of add_output
+// which allows exiting calling scope when the module is released.
+struct OutputAdder {
+    OutputAdder(Module &M, TargetMachine &TM, StringRef name, unsigned threads,
+                bool unopt_out, bool opt_out, bool obj_out, bool asm_out)
+        : TM(TM), threads(threads), unopt_out(unopt_out),
+          opt_out(opt_out), obj_out(obj_out), asm_out(asm_out),
+          timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str())
+    {
+        assert(threads);
+        assert(unopt_out || opt_out || obj_out || asm_out);
+        for (unsigned i = 0; i < threads; ++i) {
+            auto idx = std::to_string(i);
+            timers[i].name = "shard_" + idx;
+            timers[i].desc = ("Timings for " + name + " module shard " + idx).str();
+            timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
+            timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
+            timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
+            timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
+            timers[i].optimize.init("optimize_" + idx, "Optimize shard");
+            timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
+            timers[i].obj.init("obj_" + idx, "Emit object file");
+            timers[i].asm_.init("asm_" + idx, "Emit assembly file");
+        }
+        if (auto env = getenv("JULIA_IMAGE_TIMINGS")) {
+            char *endptr;
+            unsigned long val = strtoul(env, &endptr, 10);
+            if (endptr != env && !*endptr && val <= 1) {
+                report_timings = val;
+            } else {
+                if (StringRef("true").compare_insensitive(env) == 0)
+                    report_timings = true;
+                else if (StringRef("false").compare_insensitive(env) == 0)
+                    report_timings = false;
+                else
+                    errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n";
+            }
+        }
+        // Single-threaded case
+        if (threads == 1) {
+            output_timer.startTimer();
+            {
+                JL_TIMING(NATIVE_AOT, NATIVE_Opt);
+                // convert gvars to the expected offset table format for shard 0
+                if (M.getGlobalVariable("jl_gvars")) {
+                    auto gvars = consume_gv<Constant>(M, "jl_gvars", false);
+                    Type *T_size = M.getDataLayout().getIntPtrType(M.getContext());
+                    emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix"
+                    M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0");
+                }
+                output0 = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+            }
+            output_timer.stopTimer();
+            return;
+        }
+
+        partition_timer.startTimer();
+        uint64_t counter = 0;
+        // Partitioning requires all globals to have names.
+        // We use a prefix to avoid name conflicts with user code.
+        for (auto &G : M.global_values()) {
+            if (!G.isDeclaration() && !G.hasName()) {
+                G.setName("jl_ext_" + Twine(counter++));
+            }
         }
+        partitions = partitionModule(M, threads);
+        partition_timer.stopTimer();
+
+        serialize_timer.startTimer();
+        serialized = serializeModule(M);
+        serialize_timer.stopTimer();
     }
-    // Single-threaded case
-    if (threads == 1) {
+
+    auto finish()
+    {
+        SmallVector<AOTOutputs, 16> outputs(threads);
+        if (threads == 1) {
+            outputs[0] = std::move(output0);
+            if (!report_timings) {
+                timer_group.clear();
+            } else {
+                timer_group.print(dbgs(), true);
+                for (auto &t : timers) {
+                    t.print(dbgs(), true);
+                }
+            }
+            return outputs;
+        }
         output_timer.startTimer();
+
+        // Start all of the worker threads
         {
             JL_TIMING(NATIVE_AOT, NATIVE_Opt);
-            // convert gvars to the expected offset table format for shard 0
-            if (M.getGlobalVariable("jl_gvars")) {
-                auto gvars = consume_gv<Constant>(M, "jl_gvars", false);
-                Type *T_size = M.getDataLayout().getIntPtrType(M.getContext());
-                emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix"
-                M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0");
+            std::vector<uv_thread_t> workers(threads);
+            for (unsigned i = 0; i < threads; i++) {
+                schedule_uv_thread(&workers[i], [&, i]() {
+                    LLVMContext ctx;
+                    ctx.setDiscardValueNames(true);
+                    // Lazily deserialize the entire module
+                    timers[i].deserialize.startTimer();
+                    auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
+                    // Make sure this also fails with only julia, but not LLVM assertions enabled,
+                    // otherwise, the first error we hit is the LLVM module verification failure,
+                    // which will look very confusing, because the module was partially deserialized.
+                    bool deser_succeeded = (bool)EM;
+                    auto M = cantFail(std::move(EM), "Error loading module");
+                    assert(deser_succeeded); (void)deser_succeeded;
+                    timers[i].deserialize.stopTimer();
+
+                    timers[i].materialize.startTimer();
+                    materializePreserved(*M, partitions[i]);
+                    timers[i].materialize.stopTimer();
+
+                    timers[i].construct.startTimer();
+                    std::string suffix = "_" + std::to_string(i);
+                    construct_vars(*M, partitions[i], suffix);
+                    M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix));
+                    // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
+                    // or it may skip emitting debug info for that file. Here set it to ./julia#N
+                    DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
+                    if (M->getNamedMetadata("llvm.dbg.cu"))
+                        for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands())
+                            CU->replaceOperandWith(0, topfile);
+                    timers[i].construct.stopTimer();
+
+                    outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
+                });
             }
-            outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
+
+            // Wait for all of the worker threads to finish
+            for (unsigned i = 0; i < threads; i++)
+                uv_thread_join(&workers[i]);
         }
+
         output_timer.stopTimer();
-        // Don't need M anymore
-        module_released(M);
 
         if (!report_timings) {
             timer_group.clear();
@@ -1908,99 +2004,37 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
             for (auto &t : timers) {
                 t.print(dbgs(), true);
             }
+            dbgs() << "Partition weights: [";
+            bool comma = false;
+            for (auto &p : partitions) {
+                if (comma)
+                    dbgs() << ", ";
+                else
+                    comma = true;
+                dbgs() << p.weight;
+            }
+            dbgs() << "]\n";
         }
         return outputs;
     }
 
-    partition_timer.startTimer();
-    uint64_t counter = 0;
-    // Partitioning requires all globals to have names.
-    // We use a prefix to avoid name conflicts with user code.
-    for (auto &G : M.global_values()) {
-        if (!G.isDeclaration() && !G.hasName()) {
-            G.setName("jl_ext_" + Twine(counter++));
-        }
-    }
-    auto partitions = partitionModule(M, threads);
-    partition_timer.stopTimer();
-
-    serialize_timer.startTimer();
-    auto serialized = serializeModule(M);
-    serialize_timer.stopTimer();
-
-    // Don't need M anymore, since we'll only read from serialized from now on
-    module_released(M);
-
-    output_timer.startTimer();
-
-    // Start all of the worker threads
-    {
-        JL_TIMING(NATIVE_AOT, NATIVE_Opt);
-        std::vector<uv_thread_t> workers(threads);
-        for (unsigned i = 0; i < threads; i++) {
-            std::function<void()> func = [&, i]() {
-                LLVMContext ctx;
-                ctx.setDiscardValueNames(true);
-                // Lazily deserialize the entire module
-                timers[i].deserialize.startTimer();
-                auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
-                // Make sure this also fails with only julia, but not LLVM assertions enabled,
-                // otherwise, the first error we hit is the LLVM module verification failure,
-                // which will look very confusing, because the module was partially deserialized.
-                bool deser_succeeded = (bool)EM;
-                auto M = cantFail(std::move(EM), "Error loading module");
-                assert(deser_succeeded); (void)deser_succeeded;
-                timers[i].deserialize.stopTimer();
-
-                timers[i].materialize.startTimer();
-                materializePreserved(*M, partitions[i]);
-                timers[i].materialize.stopTimer();
-
-                timers[i].construct.startTimer();
-                std::string suffix = "_" + std::to_string(i);
-                construct_vars(*M, partitions[i], suffix);
-                M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix));
-                // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
-                // or it may skip emitting debug info for that file. Here set it to ./julia#N
-                DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
-                if (M->getNamedMetadata("llvm.dbg.cu"))
-                    for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands())
-                        CU->replaceOperandWith(0, topfile);
-                timers[i].construct.stopTimer();
-
-                outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
-            };
-            auto arg = new std::function<void()>(func);
-            uv_thread_create(&workers[i], lambda_trampoline, arg); // Use libuv thread to avoid issues with stack sizes
-        }
-
-        // Wait for all of the worker threads to finish
-        for (unsigned i = 0; i < threads; i++)
-            uv_thread_join(&workers[i]);
-    }
-
-    output_timer.stopTimer();
-
-    if (!report_timings) {
-        timer_group.clear();
-    } else {
-        timer_group.print(dbgs(), true);
-        for (auto &t : timers) {
-            t.print(dbgs(), true);
-        }
-        dbgs() << "Partition weights: [";
-        bool comma = false;
-        for (auto &p : partitions) {
-            if (comma)
-                dbgs() << ", ";
-            else
-                comma = true;
-            dbgs() << p.weight;
-        }
-        dbgs() << "]\n";
-    }
-    return outputs;
-}
+    TargetMachine &TM;
+    unsigned threads;
+    bool unopt_out;
+    bool opt_out;
+    bool obj_out;
+    bool asm_out;
+    AOTOutputs output0;
+    // Timers for timing purposes
+    TimerGroup timer_group;
+    SmallVector<ShardTimers, 1> timers{threads};
+    Timer partition_timer{"partition", "Partition module", timer_group};
+    Timer serialize_timer{"serialize", "Serialize module", timer_group};
+    Timer output_timer{"output", "Add outputs", timer_group};
+    bool report_timings{false};
+    SmallVector<Partition, 32> partitions;
+    SmallVector<char, 0> serialized;
+};
 
 extern int jl_is_timing_passes;
 static unsigned compute_image_thread_count(const ModuleInfo &info) {
@@ -2113,7 +2147,11 @@ void jl_dump_native_impl(void *native_code,
     }
     std::unique_ptr<TargetMachine> SourceTM(
         jl_ExecutionEngine->getTarget().createTargetMachine(
+#if JL_LLVM_VERSION < 210000
             TheTriple.getTriple(),
+#else
+            TheTriple,
+#endif
             jl_ExecutionEngine->getTargetCPU(),
             jl_ExecutionEngine->getTargetFeatureString(),
             jl_ExecutionEngine->getTargetOptions(),
@@ -2134,8 +2172,8 @@ void jl_dump_native_impl(void *native_code,
         OverrideStackAlignment = M.getOverrideStackAlignment();
     });
 
-    auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) {
-        return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released);
+    auto start_compile = [&](Module &M, StringRef name, unsigned threads) {
+        return OutputAdder(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname);
     };
 
     SmallVector<AOTOutputs, 16> sysimg_outputs;
@@ -2146,7 +2184,11 @@ void jl_dump_native_impl(void *native_code,
         LLVMContext Context;
         Context.setDiscardValueNames(true);
         Module sysimgM("sysimg", Context);
+#if JL_LLVM_VERSION < 210000
         sysimgM.setTargetTriple(TheTriple.str());
+#else
+        sysimgM.setTargetTriple(TheTriple);
+#endif
         sysimgM.setDataLayout(DL);
         sysimgM.setStackProtectorGuard(StackProtectorGuard);
         sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
@@ -2200,7 +2242,7 @@ void jl_dump_native_impl(void *native_code,
         // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
         // to function as expected
         // no need to free the module/context, destructor handles that
-        sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {});
+        sysimg_outputs = start_compile(sysimgM, "sysimg", 1).finish();
     }
 
     const bool imaging_mode = true;
@@ -2214,7 +2256,11 @@ void jl_dump_native_impl(void *native_code,
 
     data->M.withModuleDo([&](Module &dataM) {
         JL_TIMING(NATIVE_AOT, NATIVE_Setup);
+#if JL_LLVM_VERSION < 210000
         dataM.setTargetTriple(TheTriple.str());
+#else
+        dataM.setTargetTriple(TheTriple);
+#endif
         dataM.setDataLayout(DL);
         dataM.setPICLevel(PICLevel::BigPIC);
         auto &Context = dataM.getContext();
@@ -2295,19 +2341,13 @@ void jl_dump_native_impl(void *native_code,
     });
 
     {
-        // Don't use withModuleDo here since we delete the TSM midway through
-        auto TSCtx = data->M.getContext();
-        auto lock = TSCtx.getLock();
-        auto dataM = data->M.getModuleUnlocked();
-
-        data_outputs = compile(*dataM, "text", threads, [data, &lock, &TSCtx](Module &) {
-            // Delete data when add_output thinks it's done with it
-            // Saves memory for use when multithreading
-            auto lock2 = std::move(lock);
-            delete data;
-            // Drop last reference to shared LLVM::Context
-            auto TSCtx2 = std::move(TSCtx);
+        auto adder = data->M.withModuleDo([&] (auto &dataM) {
+            return start_compile(dataM, "text", threads);
         });
+        // Delete data when add_output thinks it's done with it
+        // Saves memory for use when multithreading
+        delete data;
+        data_outputs = adder.finish();
     }
 
     if (params->emit_metadata) {
@@ -2315,7 +2355,11 @@ void jl_dump_native_impl(void *native_code,
         LLVMContext Context;
         Context.setDiscardValueNames(true);
         Module metadataM("metadata", Context);
+#if JL_LLVM_VERSION < 210000
         metadataM.setTargetTriple(TheTriple.str());
+#else
+        metadataM.setTargetTriple(TheTriple);
+#endif
         metadataM.setDataLayout(DL);
         metadataM.setStackProtectorGuard(StackProtectorGuard);
         metadataM.setOverrideStackAlignment(OverrideStackAlignment);
@@ -2406,7 +2450,7 @@ void jl_dump_native_impl(void *native_code,
         }
 
         // no need to free module/context, destructor handles that
-        metadata_outputs = compile(metadataM, "data", 1, [](Module &) {});
+        metadata_outputs = start_compile(metadataM, "data", 1).finish();
     }
 
     {
@@ -2514,88 +2558,89 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_
             uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
             if (measure_compile_time_enabled)
                 compiler_start_time = jl_hrtime();
-            jl_codegen_params_t output(ctx, DL, TT);
-            output.params = &params;
-            output.imaging_mode = jl_options.image_codegen;
-            output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
-            JL_GC_PUSH1(&output.temporary_roots);
-            jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output);
-            // while not required, also emit the cfunc thunks, based on the
-            // inferred ABIs of their targets in the current latest world,
-            // since otherwise it is challenging to see all relevant codes
-            jl_compiled_functions_t compiled_functions;
-            size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
-            for (cfunc_decl_t &cfunc : output.cfuncs) {
-                jl_value_t *sigt = cfunc.abi.sigt;
-                JL_GC_PROMISE_ROOTED(sigt);
-                jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
-                if (mi == jl_nothing)
-                    continue;
-                jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
-                if (codeinst == nullptr || compiled_functions.count(codeinst))
-                    continue;
-                orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT);
-                jl_llvm_functions_t decls;
-                if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
-                    decls.functionObject = "jl_fptr_const_return";
-                else
-                    decls = jl_emit_codedecls(decl_m, codeinst, output);
-                compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)};
-            }
-            generate_cfunc_thunks(output, compiled_functions);
-            emit_always_inline(m, output);
-            output.workqueue.clear();
-            compiled_functions.clear();
-            output.temporary_roots = nullptr;
-            JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it
-
-            if (m) {
-                // if compilation succeeded, prepare to return the result
-                // Similar to jl_link_global from jitlayers.cpp,
-                // so that code_llvm shows similar codegen to the jit
-                for (auto &global : output.global_targets) {
-                    if (jl_options.image_codegen) {
-                        global.second->setLinkage(GlobalValue::ExternalLinkage);
+            withCodegenParamsDo(ctx, DL, TT, [&] (jl_codegen_params_t &output) {
+                output.params = &params;
+                output.imaging_mode = jl_options.image_codegen;
+                output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+                JL_GC_PUSH1(&output.temporary_roots);
+                jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output);
+                // while not required, also emit the cfunc thunks, based on the
+                // inferred ABIs of their targets in the current latest world,
+                // since otherwise it is challenging to see all relevant codes
+                jl_compiled_functions_t compiled_functions;
+                size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
+                for (cfunc_decl_t &cfunc : output.cfuncs) {
+                    jl_value_t *sigt = cfunc.abi.sigt;
+                    JL_GC_PROMISE_ROOTED(sigt);
+                    jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
+                    if (mi == jl_nothing)
+                        continue;
+                    jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
+                    if (codeinst == nullptr || compiled_functions.count(codeinst))
+                        continue;
+                    orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT);
+                    jl_llvm_functions_t decls;
+                    if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
+                        decls.functionObject = "jl_fptr_const_return";
+                    else
+                        decls = jl_emit_codedecls(decl_m, codeinst, output);
+                    compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)};
+                }
+                generate_cfunc_thunks(output, compiled_functions);
+                emit_always_inline(m, output);
+                output.workqueue.clear();
+                compiled_functions.clear();
+                output.temporary_roots = nullptr;
+                JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it
+
+                if (m) {
+                    // if compilation succeeded, prepare to return the result
+                    // Similar to jl_link_global from jitlayers.cpp,
+                    // so that code_llvm shows similar codegen to the jit
+                    for (auto &global : output.global_targets) {
+                        if (jl_options.image_codegen) {
+                            global.second->setLinkage(GlobalValue::ExternalLinkage);
+                        }
+                        else {
+                            auto p = literal_static_pointer_val(global.first, global.second->getValueType());
+                            Type *elty = PointerType::get(p->getContext(), 0);
+                            // For pretty printing, when LLVM inlines the global initializer into its loads
+                            auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
+                            global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
+                            global.second->setConstant(true);
+                            global.second->setLinkage(GlobalValue::PrivateLinkage);
+                            global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+                            global.second->setVisibility(GlobalValue::DefaultVisibility);
+                        }
                     }
-                    else {
-                        auto p = literal_static_pointer_val(global.first, global.second->getValueType());
-                        Type *elty = PointerType::get(p->getContext(), 0);
-                        // For pretty printing, when LLVM inlines the global initializer into its loads
-                        auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
-                        global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
-                        global.second->setConstant(true);
-                        global.second->setLinkage(GlobalValue::PrivateLinkage);
-                        global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-                        global.second->setVisibility(GlobalValue::DefaultVisibility);
+                    if (!jl_options.image_codegen) {
+                        optimizeDLSyms(*m.getModuleUnlocked());
                     }
-                }
-                if (!jl_options.image_codegen) {
-                    optimizeDLSyms(*m.getModuleUnlocked());
-                }
-                assert(!verifyLLVMIR(*m.getModuleUnlocked()));
-                if (optimize) {
-                    auto opts = OptimizationOptions::defaults();
-                    opts.sanitize_memory = params.sanitize_memory;
-                    opts.sanitize_thread = params.sanitize_thread;
-                    opts.sanitize_address = params.sanitize_address;
-                    NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts};
-                    //Safe b/c context lock is held by output
-                    PM.run(*m.getModuleUnlocked());
                     assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                    if (optimize) {
+                        auto opts = OptimizationOptions::defaults();
+                        opts.sanitize_memory = params.sanitize_memory;
+                        opts.sanitize_thread = params.sanitize_thread;
+                        opts.sanitize_address = params.sanitize_address;
+                        NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts};
+                        //Safe b/c context lock is held by output
+                        PM.run(*m.getModuleUnlocked());
+                        assert(!verifyLLVMIR(*m.getModuleUnlocked()));
+                    }
+                    const std::string *fname;
+                    if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
+                        getwrapper = false;
+                    if (!getwrapper)
+                        fname = &decls.specFunctionObject;
+                    else
+                        fname = &decls.functionObject;
+                    F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
                 }
-                const std::string *fname;
-                if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
-                    getwrapper = false;
-                if (!getwrapper)
-                    fname = &decls.specFunctionObject;
-                else
-                    fname = &decls.functionObject;
-                F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
-            }
-            if (measure_compile_time_enabled) {
-                auto end = jl_hrtime();
-                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
-            }
+                if (measure_compile_time_enabled) {
+                    auto end = jl_hrtime();
+                    jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
+                }
+            });
         }
         if (F) {
             dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 62f31e237f4b6..86e57f7208c69 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -1009,8 +1009,13 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // above problem won't be as serious.
 
     auto merged_ai = dst_ai.merge(src_ai);
+#if JL_LLVM_VERSION < 210000
     ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
+#else
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
+                             merged_ai.toAAMDNodes());
+#endif
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
@@ -1023,8 +1028,13 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     ++EmittedMemcpys;
 
     auto merged_ai = dst_ai.merge(src_ai);
+#if JL_LLVM_VERSION < 210000
     ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
+#else
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
+                             merged_ai.toAAMDNodes());
+#endif
 }
 
 template<typename T1>
diff --git a/src/codegen.cpp b/src/codegen.cpp
index e145c73dc6c38..1d71b152e89f0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -605,6 +605,24 @@ AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKin
     return AttributeSet::get(C, ArrayRef<Attribute>(attrs));
 }
 
+static inline Attribute NoCaptureAttr(LLVMContext &C)
+{
+#if JL_LLVM_VERSION < 210000
+    return Attribute::get(C, Attribute::NoCapture);
+#else
+    return Attribute::getWithCaptureInfo(C, CaptureInfo(CaptureComponents::None));
+#endif
+}
+
+static inline void addNoCaptureAttr(AttrBuilder &param)
+{
+#if JL_LLVM_VERSION < 210000
+    param.addAttribute(Attribute::NoCapture);
+#else
+    param.addCapturesAttr(CaptureInfo(CaptureComponents::None));
+#endif
+}
+
 static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }
 
 static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
@@ -617,7 +635,7 @@ static AttributeList get_func_attrs(LLVMContext &C)
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             {AttributeSet(),
-             Attributes(C, {Attribute::NoAlias, Attribute::ReadOnly, Attribute::NoCapture, Attribute::NoUndef})});
+             Attributes(C, {Attribute::NoAlias, Attribute::ReadOnly, Attribute::NoUndef}, {NoCaptureAttr(C)})});
 }
 
 static AttributeList get_attrs_noreturn(LLVMContext &C)
@@ -996,7 +1014,7 @@ static const auto jllockvalue_func = new JuliaFunction<>{
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
-            {Attributes(C, {Attribute::NoCapture})}); },
+            {Attributes(C, {}, {NoCaptureAttr(C)})}); },
 };
 static const auto jlunlockvalue_func = new JuliaFunction<>{
     XSTR(jl_unlock_value),
@@ -1005,7 +1023,7 @@ static const auto jlunlockvalue_func = new JuliaFunction<>{
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
-            {Attributes(C, {Attribute::NoCapture})}); },
+            {Attributes(C, {}, {NoCaptureAttr(C)})}); },
 };
 static const auto jllockfield_func = new JuliaFunction<>{
     XSTR(jl_lock_field),
@@ -1014,7 +1032,7 @@ static const auto jllockfield_func = new JuliaFunction<>{
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
-            {Attributes(C, {Attribute::NoCapture})}); },
+            {Attributes(C, {}, {NoCaptureAttr(C)})}); },
 };
 static const auto jlunlockfield_func = new JuliaFunction<>{
     XSTR(jl_unlock_field),
@@ -1023,7 +1041,7 @@ static const auto jlunlockfield_func = new JuliaFunction<>{
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             AttributeSet(),
-            {Attributes(C, {Attribute::NoCapture})}); },
+            {Attributes(C, {}, {NoCaptureAttr(C)})}); },
 };
 static const auto jlenter_func = new JuliaFunction<>{
     XSTR(jl_enter_handler),
@@ -1489,7 +1507,7 @@ static const auto gc_loaded_func = new JuliaFunction<>{
         RetAttrs.addAttribute(Attribute::NonNull);
         RetAttrs.addAttribute(Attribute::NoUndef);
         return AttributeList::get(C, AttributeSet::get(C,FnAttrs), AttributeSet::get(C,RetAttrs),
-                { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
+                { Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}, {NoCaptureAttr(C)}),
                   Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) });
                   },
 };
@@ -1684,6 +1702,15 @@ struct jl_aliasinfo_t {
     // memory region non-aliasing. It should be deleted once the TBAA metadata
     // is improved to encode only memory layout and *not* memory regions.
     static jl_aliasinfo_t fromTBAA(jl_codectx_t &ctx, MDNode *tbaa);
+
+    AAMDNodes toAAMDNodes() const
+    {
+#if JL_LLVM_VERSION < 220000
+        return AAMDNodes(tbaa, tbaa_struct, scope, noalias);
+#else
+        return AAMDNodes(tbaa, tbaa_struct, scope, noalias, nullptr);
+#endif
+    }
 };
 
 // metadata tracking for a llvm Value* during codegen
@@ -2681,7 +2708,11 @@ std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &conte
         m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
             llvm::DEBUG_METADATA_VERSION);
     m->setDataLayout(DL);
+#if JL_LLVM_VERSION < 210000
     m->setTargetTriple(triple.str());
+#else
+    m->setTargetTriple(triple);
+#endif
 
     if (triple.isOSWindows() && triple.getArch() == Triple::x86) {
         // tell Win32 to assume the stack is always 16-byte aligned,
@@ -7759,7 +7790,7 @@ const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value
         crt = (jl_value_t*)jl_any_type;
     }
     bool toboxed;
-    Type *lcrt = _julia_struct_to_llvm(&params, *params.tsctx.getContext(), crt, &toboxed);
+    Type *lcrt = _julia_struct_to_llvm(&params, params.getContext(), crt, &toboxed);
     if (toboxed)
         lcrt = JuliaType::get_prjlvalue_ty(lcrt->getContext());
     size_t nargs = jl_nparams(sigt)-1;
@@ -7937,7 +7968,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module
         AttrBuilder param(M->getContext());
         param.addStructRetAttr(srt);
         param.addAttribute(Attribute::NoAlias);
-        param.addAttribute(Attribute::NoCapture);
+        addNoCaptureAttr(param);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(M->getContext(), param));
         assert(fsig.size() == 1);
@@ -7945,7 +7976,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module
     if (props.cc == jl_returninfo_t::Union) {
         AttrBuilder param(M->getContext());
         param.addAttribute(Attribute::NoAlias);
-        param.addAttribute(Attribute::NoCapture);
+        addNoCaptureAttr(param);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(M->getContext(), param));
         assert(fsig.size() == 1);
@@ -7954,7 +7985,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module
     if (props.return_roots) {
         AttrBuilder param(M->getContext());
         param.addAttribute(Attribute::NoAlias);
-        param.addAttribute(Attribute::NoCapture);
+        addNoCaptureAttr(param);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(M->getContext(), param));
         fsig.push_back(getPointerTy(M->getContext()));
@@ -7988,7 +8019,7 @@ static jl_returninfo_t get_specsig_function(jl_codegen_params_t &params, Module
         AttrBuilder param(M->getContext());
         Type *ty = et;
         if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer
-            param.addAttribute(Attribute::NoCapture);
+            addNoCaptureAttr(param);
             param.addAttribute(Attribute::ReadOnly);
             ty = PointerType::get(M->getContext(), AddressSpace::Derived);
         }
@@ -8119,7 +8150,7 @@ static jl_llvm_functions_t
     size_t min_world = src->min_world;
     size_t max_world = src->max_world;
     jl_llvm_functions_t declarations;
-    jl_codectx_t ctx(*params.tsctx.getContext(), params, min_world, max_world);
+    jl_codectx_t ctx(params.getContext(), params, min_world, max_world);
     jl_datatype_t *vatyp = NULL;
     JL_GC_PUSH2(&ctx.code, &vatyp);
     ctx.code = src->code;
@@ -9884,7 +9915,9 @@ void linkFunctionBody(Function &Dst, Function &Src)
         Dst.setPersonalityFn(Src.getPersonalityFn());
     if (Src.hasPersonalityFn())
         Dst.setPersonalityFn(Src.getPersonalityFn());
+#if JL_LLVM_VERSION < 210000
     assert(Src.IsNewDbgInfoFormat == Dst.IsNewDbgInfoFormat);
+#endif
 
     // Copy over the metadata attachments without remapping.
     Dst.copyMetadata(&Src, 0);
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 752dc505092fa..98044f763745c 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -505,8 +505,13 @@ static int lookup_pointer(
         else {
             int havelock = jl_lock_profile_wr();
             assert(havelock); (void)havelock;
-            info = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
+            auto lineinfo = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
             jl_unlock_profile_wr();
+#if JL_LLVM_VERSION < 210000
+            info = std::move(lineinfo);
+#else
+            info = std::move(lineinfo.value());
+#endif
         }
 
         jl_frame_t *frame = &(*frames)[i];
diff --git a/src/disasm.cpp b/src/disasm.cpp
index a80f79218f509..2daa8bc2bcbb0 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -506,37 +506,38 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada
         auto TSM = std::unique_ptr<orc::ThreadSafeModule>(unwrap(dump->TSM));
         //If TSM is not passed in, then the context MUST be locked externally.
         //RAII will release the lock
-        std::optional<orc::ThreadSafeContext::Lock> lock;
-        if (TSM) {
-            lock.emplace(TSM->getContext().getLock());
-        }
-        Function *llvmf = cast<Function>(unwrap(dump->F));
-        if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent()))
-            jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
-
-        LineNumberAnnotatedWriter AAW{"; ", false, debuginfo};
-        if (!llvmf->getParent()) {
-            // print the function declaration as-is
-            llvmf->print(stream, &AAW);
-            delete llvmf;
-        }
-        else {
-            assert(TSM && TSM->getModuleUnlocked() == llvmf->getParent() && "Passed module was not the same as function parent!");
-            auto m = TSM->getModuleUnlocked();
-            if (strip_ir_metadata) {
-                std::string llvmfn(llvmf->getName());
-                jl_strip_llvm_addrspaces(m);
-                jl_strip_llvm_debug(m, true, &AAW);
-                // rewriting the function type creates a new function, so look it up again
-                llvmf = m->getFunction(llvmfn);
-            }
-            if (dump_module) {
-                m->print(stream, &AAW);
+        orc::ThreadSafeContext TSCtx;
+        if (TSM)
+            TSCtx = TSM->getContext();
+        withContextDo(TSCtx, [&] (LLVMContext*) {
+            Function *llvmf = cast<Function>(unwrap(dump->F));
+            if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent()))
+                jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
+
+            LineNumberAnnotatedWriter AAW{"; ", false, debuginfo};
+            if (!llvmf->getParent()) {
+                // print the function declaration as-is
+                llvmf->print(stream, &AAW);
+                delete llvmf;
             }
             else {
-                llvmf->print(stream, &AAW);
+                assert(TSM && TSM->getModuleUnlocked() == llvmf->getParent() && "Passed module was not the same as function parent!");
+                auto m = TSM->getModuleUnlocked();
+                if (strip_ir_metadata) {
+                    std::string llvmfn(llvmf->getName());
+                    jl_strip_llvm_addrspaces(m);
+                    jl_strip_llvm_debug(m, true, &AAW);
+                    // rewriting the function type creates a new function, so look it up again
+                    llvmf = m->getFunction(llvmfn);
+                }
+                if (dump_module) {
+                    m->print(stream, &AAW);
+                }
+                else {
+                    llvmf->print(stream, &AAW);
+                }
             }
-        }
+        });
     }
 
     return jl_pchar_to_string(stream.str().data(), stream.str().size());
@@ -924,7 +925,11 @@ static void jl_dump_asm_internal(
     // LLVM will destroy the formatted stream, and we keep the raw stream.
     std::unique_ptr<formatted_raw_ostream> ustream(new formatted_raw_ostream(rstream));
     std::unique_ptr<MCStreamer> Streamer(
-#if JL_LLVM_VERSION >= 190000
+#if JL_LLVM_VERSION >= 210000
+        TheTarget->createAsmStreamer(Ctx, std::move(ustream),
+
+                                     std::move(IP), std::move(CE), std::move(MAB))
+#elif JL_LLVM_VERSION >= 190000
         TheTarget->createAsmStreamer(Ctx, std::move(ustream),
 
                                      IP.release(), std::move(CE), std::move(MAB))
@@ -1268,8 +1273,8 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
                 OutputAsmDialect = 0;
             if (!strcmp(asm_variant, "intel"))
                 OutputAsmDialect = 1;
-            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
-                jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
+            std::unique_ptr<MCInstPrinter> InstPrinter(TM->getTarget().createMCInstPrinter(
+                                                           jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI));
             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
                 STI, MRI, Options));
             std::unique_ptr<MCCodeEmitter> MCE;
@@ -1278,8 +1283,10 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
             }
             auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
             std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-#if JL_LLVM_VERSION >= 190000
-                *Context, std::move(FOut), InstPrinter, std::move(MCE), std::move(MAB)
+#if JL_LLVM_VERSION >= 210000
+                *Context, std::move(FOut), std::move(InstPrinter), std::move(MCE), std::move(MAB)
+#elif JL_LLVM_VERSION >= 190000
+                *Context, std::move(FOut), InstPrinter.release(), std::move(MCE), std::move(MAB)
 #else
                 *Context, std::move(FOut), true, true, InstPrinter, std::move(MCE),
                 std::move(MAB), false
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 0773d1a6c16a1..af10521902790 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -14,6 +14,9 @@
 #include <llvm/ExecutionEngine/Orc/CompileUtils.h>
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
 #include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
+#if JL_LLVM_VERSION >= 210000
+#  include <llvm/ExecutionEngine/Orc/SelfExecutorProcessControl.h>
+#endif
 #include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
 #if JL_LLVM_VERSION >= 200000
 #include <llvm/ExecutionEngine/Orc/AbsoluteSymbols.h>
@@ -299,8 +302,7 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi,
 
     orc::ThreadSafeModule result_m;
     std::string gf_thunk_name;
-    {
-        jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
+    withCodegenParamsDo(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple(), [&] (jl_codegen_params_t &params) {
         params.getContext().setDiscardValueNames(true);
         params.cache = true;
         params.imaging_mode = 0;
@@ -320,7 +322,7 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi,
         SmallVector<orc::ThreadSafeModule,0> sharedmodules;
         finish_params(M, params, sharedmodules);
         assert(sharedmodules.empty());
-    }
+    });
     int8_t gc_state = jl_gc_safe_enter(ct->ptls);
     jl_ExecutionEngine->addModule(std::move(result_m));
     uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name);
@@ -599,11 +601,12 @@ static void prepare_compile(jl_code_instance_t *codeinst) JL_NOTSAFEPOINT_LEAVE
             assert(waiting == std::get<1>(it->second));
             std::get<1>(it->second) = 0;
             auto &params = std::get<0>(it->second);
-            params.tsctx_lock = params.tsctx.getLock();
-            waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint
-            assert(!waiting); (void)waiting;
-            Module *M = emittedmodules[codeinst].getModuleUnlocked();
-            finish_params(M, params, sharedmodules);
+            params.withContextDo([&] (LLVMContext*) JL_NOTSAFEPOINT {
+                waiting = jl_analyze_workqueue(codeinst, params, true); // may safepoint
+                assert(!waiting); (void)waiting;
+                Module *M = emittedmodules[codeinst].getModuleUnlocked();
+                finish_params(M, params, sharedmodules);
+            });
             incompletemodules.erase(it);
         }
         // and then indicate this should be compiled now
@@ -630,13 +633,14 @@ static void complete_emit(jl_code_instance_t *edge) JL_NOTSAFEPOINT_LEAVE JL_NOT
         assert(it != incompletemodules.end());
         if (--std::get<1>(it->second) == 0) {
             auto &params = std::get<0>(it->second);
-            params.tsctx_lock = params.tsctx.getLock();
-            assert(callee == it->first);
-            orc::ThreadSafeModule &M = emittedmodules[callee];
-            emit_always_inline(M, params); // may safepoint
-            int waiting = jl_analyze_workqueue(callee, params); // may safepoint
-            assert(!waiting); (void)waiting;
-            finish_params(M.getModuleUnlocked(), params, sharedmodules);
+            params.withContextDo([&] (LLVMContext*) {
+                assert(callee == it->first);
+                orc::ThreadSafeModule &M = emittedmodules[callee];
+                emit_always_inline(M, params); // may safepoint
+                int waiting = jl_analyze_workqueue(callee, params); // may safepoint
+                assert(!waiting); (void)waiting;
+                finish_params(M.getModuleUnlocked(), params, sharedmodules);
+            });
             incompletemodules.erase(it);
         }
     }
@@ -656,10 +660,9 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst)
         if (!sharedmodules.empty()) {
             auto TSM = sharedmodules.pop_back_val();
             lock.native.unlock();
-            {
-                auto Lock = TSM.getContext().getLock();
+            withContextDo(TSM.getContext(), [&] (LLVMContext*) {
                 jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
-            }
+            });
             jl_ExecutionEngine->addModule(std::move(TSM));
             lock.native.lock();
         }
@@ -675,10 +678,9 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst)
             emittedmodules.erase(TSMref);
             lock.native.unlock();
             uint64_t start_time = jl_hrtime();
-            {
-                auto Lock = TSM.getContext().getLock();
+            withContextDo(TSM.getContext(), [&] (LLVMContext*) {
                 jl_ExecutionEngine->optimizeDLSyms(*TSM.getModuleUnlocked()); // may safepoint
-            }
+            });
             jl_ExecutionEngine->addModule(std::move(TSM)); // may safepoint
             // If logging of the compilation stream is enabled,
             // then dump the method-instance specialization type to the stream
@@ -809,26 +811,31 @@ void jl_emit_codeinst_to_jit_impl(
     }
     JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE);
     // emit the code in LLVM IR form to the new context
-    jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
-    params.getContext().setDiscardValueNames(true);
-    params.cache = true;
-    params.imaging_mode = 0;
-    orc::ThreadSafeModule result_m =
-        jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple);
-    params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
-    JL_GC_PUSH1(&params.temporary_roots);
-    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints
-    if (!result_m) {
+    orc::ThreadSafeModule result_m;
+    jl_llvm_functions_t decls;
+    jl_codegen_params_t params(std::make_unique<LLVMContext>(), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple(), std::defer_lock);
+    auto exit = params.withContextDo([&] (LLVMContext *ctx) {
+        ctx->setDiscardValueNames(true);
+        params.cache = true;
+        params.imaging_mode = 0;
+        result_m =
+            jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)), params.tsctx, params.DL, params.TargetTriple);
+        params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
+        JL_GC_PUSH1(&params.temporary_roots);
+        decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints
+        if (!result_m) {
+            JL_GC_POP();
+            return true;
+        }
+        jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints
+        params.temporary_roots = nullptr;
+        params.temporary_roots_set.clear();
         JL_GC_POP();
+        return false;
+    });
+    if (exit)
         return;
-    }
-    jl_optimize_roots(params, jl_get_ci_mi(codeinst), *result_m.getModuleUnlocked()); // contains safepoints
-    params.temporary_roots = nullptr;
-    params.temporary_roots_set.clear();
-    JL_GC_POP();
-    { // drop lock before acquiring engine_lock
-        auto release = std::move(params.tsctx_lock);
-    }
+    // drop codegen params lock before acquiring engine_lock
     jl_unique_gcsafe_lock lock(engine_lock);
     if (invokenames.count(codeinst) || jl_is_compiled_codeinst(codeinst))
         return; // destroy everything
@@ -852,16 +859,17 @@ void jl_emit_codeinst_to_jit_impl(
     jl_atomic_cmpswap_relaxed(&codeinst->invoke, &expected, jl_fptr_wait_for_compiled_addr);
     invokenames[codeinst] = std::move(decls);
     complete_emit(codeinst);
-    params.tsctx_lock = params.tsctx.getLock(); // re-acquire lock
-    emit_always_inline(result_m, params);
-    int waiting = jl_analyze_workqueue(codeinst, params);
-    if (waiting) {
-        auto release = std::move(params.tsctx_lock); // unlock again before moving from it
-        incompletemodules.try_emplace(codeinst, std::move(params), waiting);
-    }
-    else {
-        finish_params(result_m.getModuleUnlocked(), params, sharedmodules);
-    }
+    params.withContextDo([&] (LLVMContext *ctx) {
+        // re-acquire lock
+        emit_always_inline(result_m, params);
+        int waiting = jl_analyze_workqueue(codeinst, params);
+        if (waiting) {
+            incompletemodules.try_emplace(codeinst, std::move(params), waiting);
+        }
+        else {
+            finish_params(result_m.getModuleUnlocked(), params, sharedmodules);
+        }
+    });
     emittedmodules[codeinst] = std::move(result_m);
 }
 
@@ -1223,6 +1231,8 @@ std::unique_ptr<jitlink::JITLinkMemoryManager> createJITLinkMemoryManager() JL_N
 #endif
 }
 
+#if defined(JL_USE_JITLINK) && defined(LLVM_SHLIB)
+#  if JL_LLVM_VERSION < 210000
 class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
 public:
     Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override {
@@ -1235,6 +1245,72 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
         return Error::success();
     }
 };
+#  else
+class JLEHFrameRegistrationPlugin final : public LinkGraphLinkingLayer::Plugin {
+    static Error registerEHFrameWrapper(orc::ExecutorAddrRange EHFrame) {
+        register_eh_frames(EHFrame.Start.toPtr<uint8_t *>(), static_cast<size_t>(EHFrame.size()));
+        return Error::success();
+    }
+
+    static Error deregisterEHFrameWrapper(orc::ExecutorAddrRange EHFrame) {
+        deregister_eh_frames(EHFrame.Start.toPtr<uint8_t *>(), static_cast<size_t>(EHFrame.size()));
+        return Error::success();
+    }
+
+    static orc::shared::CWrapperFunctionResult
+    registerEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) {
+        using namespace llvm::orc::shared;
+        return WrapperFunction<SPSError(SPSExecutorAddrRange)>::handle(
+            ArgData, ArgSize, registerEHFrameWrapper)
+            .release();
+    }
+
+    static orc::shared::CWrapperFunctionResult
+    deregisterEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) {
+        using namespace llvm::orc::shared;
+        return WrapperFunction<SPSError(SPSExecutorAddrRange)>::handle(
+            ArgData, ArgSize, deregisterEHFrameWrapper)
+            .release();
+    }
+
+    static Error postFixup(jitlink::LinkGraph &G)
+    {
+        using namespace llvm::orc::shared;
+        auto registerFrame = ExecutorAddr::fromPtr(registerEHFrameSectionAllocAction);
+        auto deregisterFrame = ExecutorAddr::fromPtr(deregisterEHFrameSectionAllocAction);
+        if (auto *EHFrame = jitlink::getEHFrameSection(G)) {
+            auto R = jitlink::SectionRange(*EHFrame).getRange();
+            G.allocActions().push_back(
+                {cantFail(
+                        WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddrRange>>(
+                            registerFrame, R)),
+                 cantFail(
+                     WrapperFunctionCall::Create<SPSArgList<SPSExecutorAddrRange>>(
+                         deregisterFrame, R))});
+        }
+        return Error::success();
+    }
+
+public:
+    JLEHFrameRegistrationPlugin() {}
+
+    void modifyPassConfig(MaterializationResponsibility&,
+                          jitlink::LinkGraph&,
+                          jitlink::PassConfiguration &PassConfig) override
+    {
+        PassConfig.PostFixupPasses.push_back(postFixup);
+    }
+    Error notifyFailed(MaterializationResponsibility&) override {
+        return Error::success();
+    }
+    Error notifyRemovingResources(JITDylib&, ResourceKey) override {
+        return Error::success();
+    }
+    void notifyTransferringResources(JITDylib&, ResourceKey,
+                                     ResourceKey) override {}
+};
+#  endif
+#endif
 
 RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT;
 
@@ -1401,7 +1477,12 @@ namespace {
         }
         auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
         auto TM = TheTarget->createTargetMachine(
-                TheTriple.getTriple(), TheCPU, FeaturesStr,
+#if JL_LLVM_VERSION < 210000
+                TheTriple.getTriple(),
+#else
+                TheTriple,
+#endif
+                TheCPU, FeaturesStr,
                 options,
                 relocmodel,
                 codemodel,
@@ -1931,7 +2012,8 @@ JuliaOJIT::JuliaOJIT()
     MemMgr(createRTDyldMemoryManager()),
     UnlockedObjectLayer(
             ES,
-            [this]() {
+            [this](auto&&...) {
+                // LLVM 21+ passes in a memory buffer
                 std::unique_ptr<RuntimeDyld::MemoryManager> result(new ForwardingMemoryManager(MemMgr));
                 return result;
             }
@@ -1946,15 +2028,26 @@ JuliaOJIT::JuliaOJIT()
     OptSelLayer(ES, OptimizeLayer, static_cast<orc::ThreadSafeModule (*)(orc::ThreadSafeModule, orc::MaterializationResponsibility&)>(selectOptLevel))
 {
 #ifdef JL_USE_JITLINK
-# if defined(LLVM_SHLIB)
+# if JL_LLVM_VERSION < 210000
+#  if defined(LLVM_SHLIB)
     // When dynamically linking against LLVM, use our custom EH frame registration code
     // also used with RTDyld to inform both our and the libc copy of libunwind.
     auto ehRegistrar = std::make_unique<JLEHFrameRegistrar>();
-# else
+#  else
     auto ehRegistrar = std::make_unique<jitlink::InProcessEHFrameRegistrar>();
-# endif
+#  endif
     ObjectLayer.addPlugin(std::make_unique<EHFrameRegistrationPlugin>(
         ES, std::move(ehRegistrar)));
+#else
+    // llvm's EHFrameRegistrationPlugin does not seem to have any customization
+    // hooks in 21+. Do our own registration with a separate plugin instead.
+#  if defined(LLVM_SHLIB)
+    // When dynamically linking against LLVM, use our custom EH frame registration code
+    // also used with RTDyld to inform both our and the libc copy of libunwind.
+    ObjectLayer.addPlugin(std::make_unique<JLEHFrameRegistrationPlugin>());
+#  endif
+    ObjectLayer.addPlugin(std::move(EHFrameRegistrationPlugin::Create(ES).get()));
+#endif
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
     ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(&jit_bytes_size));
@@ -2105,27 +2198,28 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
     TSM = selectOptLevel(std::move(TSM));
     TSM = (*Optimizers)(std::move(TSM));
     TSM = (*JITPointers)(std::move(TSM));
-    auto Lock = TSM.getContext().getLock();
-    Module &M = *TSM.getModuleUnlocked();
-
-    for (auto &f : M) {
-        if (!f.isDeclaration()){
-            jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str());
+    auto Obj = withContextDo(TSM.getContext(), [&] (LLVMContext*) {
+        Module &M = *TSM.getModuleUnlocked();
+        for (auto &f : M) {
+            if (!f.isDeclaration()){
+                jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, f.getName().str().c_str());
+            }
         }
-    }
 
-    // Treat this as if one of the passes might contain a safepoint
-    // even though that shouldn't be the case and might be unwise
-    Expected<std::unique_ptr<MemoryBuffer>> Obj = CompileLayer.getCompiler()(M);
-    if (!Obj) {
+        // Treat this as if one of the passes might contain a safepoint
+        // even though that shouldn't be the case and might be unwise
+        auto Obj = CompileLayer.getCompiler()(M);
+        if (!Obj) {
 #ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint
-        ES.reportError(Obj.takeError());
+            ES.reportError(Obj.takeError());
 #endif
-        errs() << "Failed to add module to JIT!\n";
-        errs() << "Dumping failing module\n" << M << "\n";
+            errs() << "Failed to add module to JIT!\n";
+            errs() << "Dumping failing module\n" << M << "\n";
+        }
+        return Obj;
+    });
+    if (!Obj)
         return;
-    }
-    { auto release = std::move(Lock); }
     auto Err = JuliaOJIT::addObjectFile(JD, std::move(*Obj));
     if (Err) {
 #ifndef __clang_analyzer__ // reportError calls an arbitrary function, which the static analyzer thinks might be a safepoint
@@ -2387,7 +2481,11 @@ std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
 {
     auto NewTM = std::unique_ptr<TargetMachine>(getTarget()
         .createTargetMachine(
+#if JL_LLVM_VERSION < 210000
             getTargetTriple().str(),
+#else
+            getTargetTriple(),
+#endif
             getTargetCPU(),
             getTargetFeatureString(),
             getTargetOptions(),
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 5c1fb10a84205..abf97e9e0e89d 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -1,5 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include <llvm/ADT/MapVector.h>
 #include <llvm/ADT/StringSet.h>
@@ -246,15 +247,32 @@ struct cfunc_decl_t {
 
 typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
 
+template<typename CB>
+auto withContextDo(orc::ThreadSafeContext &tsctx, CB &&cb)
+{
+#if JL_LLVM_VERSION < 210000
+    auto lock = tsctx.getLock();
+    return cb(tsctx.getContext());
+#else
+    return tsctx.withContextDo(std::forward<CB>(cb));
+#endif
+}
+
+template<typename CB>
+auto withContextDo(orc::ThreadSafeContext &&tsctx, CB &&cb)
+{
+    return withContextDo(tsctx, std::forward<CB>(cb));
+}
+
 typedef std::list<std::tuple<std::string, std::string, unsigned int>> CallFrames;
 struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
-    orc::ThreadSafeContext::Lock tsctx_lock;
+    LLVMContext *_ctx{nullptr};
     DataLayout DL;
     Triple TargetTriple;
 
     inline LLVMContext &getContext() JL_NOTSAFEPOINT {
-        return *tsctx.getContext();
+        return *_ctx;
     }
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
@@ -293,9 +311,9 @@ struct jl_codegen_params_t {
     bool imaging_mode;
     bool safepoint_on_entry = true;
     bool use_swiftcc = true;
-    jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) JL_NOTSAFEPOINT  JL_NOTSAFEPOINT_ENTER
+    jl_codegen_params_t(orc::ThreadSafeContext ctx,
+                        DataLayout DL, Triple triple, std::defer_lock_t) JL_NOTSAFEPOINT  JL_NOTSAFEPOINT_ENTER
       : tsctx(std::move(ctx)),
-        tsctx_lock(tsctx.getLock()),
         DL(std::move(DL)),
         TargetTriple(std::move(triple)),
         imaging_mode(1)
@@ -306,8 +324,28 @@ struct jl_codegen_params_t {
     }
     jl_codegen_params_t(jl_codegen_params_t &&) JL_NOTSAFEPOINT = default;
     ~jl_codegen_params_t() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE = default;
+
+    template<typename CB>
+    auto withContextDo(CB &&cb) {
+        return ::withContextDo(tsctx, [&] (LLVMContext *_ctx) {
+            this->_ctx = _ctx;
+            auto guard = make_scope_exit([&] { this->_ctx = nullptr; });
+            return cb(_ctx);
+        });
+    }
 };
 
+template<typename CB>
+auto withCodegenParamsDo(orc::ThreadSafeContext ctx, DataLayout DL,
+                         Triple triple, CB &&cb)
+{
+    jl_codegen_params_t params(std::move(ctx), std::move(DL),
+                               std::move(triple), std::defer_lock);
+    return params.withContextDo([&] (LLVMContext*) {
+        return cb(params);
+    });
+}
+
 const char *jl_generate_ccallable(Module *llvmmod, jl_value_t *nameval, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 jl_llvm_functions_t jl_emit_code(
@@ -689,8 +727,9 @@ class JuliaOJIT {
 extern JuliaOJIT *jl_ExecutionEngine;
 std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT;
 inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, const DataLayout &DL, const Triple &triple) JL_NOTSAFEPOINT {
-    auto lock = ctx.getLock();
-    return orc::ThreadSafeModule(jl_create_llvm_module(name, *ctx.getContext(), DL, triple), ctx);
+    return withContextDo(ctx, [&] (LLVMContext *_ctx) JL_NOTSAFEPOINT {
+        return orc::ThreadSafeModule(jl_create_llvm_module(name, *_ctx, DL, triple), ctx);
+    });
 }
 
 Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT {