@@ -19,6 +19,14 @@ function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=
1919 end
2020 run!(pb, mod, tm)
2121 end
22+
23+ # Make sure any lingering TLS getters are rewritten even if upstream LLVM passes
24+ # transformed them before the GPULowerPTLSPass had a chance to run.
25+ if occursin(" StaticCompilerTarget" , string(typeof(job. config. target))) &&
26+ uses_julia_runtime(job)
27+ lower_ptls!(mod)
28+ end
29+
2230 optimize_module!(job, mod)
2331 run!(DeadArgumentEliminationPass(), mod, tm)
2432 return
@@ -405,7 +413,31 @@ function lower_ptls!(mod::LLVM.Module)
405413
406414 intrinsic = " julia.get_pgcstack"
407415
408- if haskey(functions(mod), intrinsic)
416+ # On host-style static targets we want a relocatable call into libjulia instead of
417+ # embedding the pointer to the TLS getter. Replace the intrinsic with a declared
418+ # libjulia call to avoid baking absolute addresses that crash in standalone binaries.
419+ if haskey(functions(mod), intrinsic) &&
420+ occursin(" StaticCompilerTarget" , string(typeof(job. config. target))) &&
421+ uses_julia_runtime(job)
422+
423+ pgc_fn = functions(mod)[intrinsic]
424+ jl_decl = if haskey(functions(mod), " jl_get_pgcstack" )
425+ functions(mod)[" jl_get_pgcstack" ]
426+ else
427+ LLVM. Function(mod, " jl_get_pgcstack" , LLVM. FunctionType(LLVM. PointerType()))
428+ end
429+
430+ for use in uses(pgc_fn)
431+ call = user(use):: LLVM.CallInst
432+ @dispose builder= IRBuilder() begin
433+ position!(builder, call)
434+ repl = call!(builder, function_type(jl_decl), jl_decl, LLVM. Value[])
435+ replace_uses!(call, repl)
436+ end
437+ erase!(call)
438+ changed = true
439+ end
440+ elseif haskey(functions(mod), intrinsic)
409441 ptls_getter = functions(mod)[intrinsic]
410442
411443 for use in uses(ptls_getter)
@@ -419,6 +451,34 @@ function lower_ptls!(mod::LLVM.Module)
419451 end
420452 end
421453
454+ # Newer Julia versions sometimes lower the TLS getter to an inttoptr call that bakes
455+ # the address of `jl_get_pgcstack_static` into the IR. Rewrite those calls as well to
456+ # make sure we always end up with a relocatable reference into libjulia when the
457+ # runtime is linked.
458+ if uses_julia_runtime(job) && occursin(" StaticCompilerTarget" , string(typeof(job. config. target)))
459+ jl_decl = if haskey(functions(mod), " jl_get_pgcstack" )
460+ functions(mod)[" jl_get_pgcstack" ]
461+ else
462+ LLVM. Function(mod, " jl_get_pgcstack" , LLVM. FunctionType(LLVM. PointerType()))
463+ end
464+
465+ for f in functions(mod), bb in blocks(f), inst in instructions(bb)
466+ inst isa LLVM. CallInst || continue
467+
468+ callee = LLVM. called_operand(inst)
469+ if callee isa LLVM. ConstantExpr && occursin(" inttoptr" , string(callee)) &&
470+ occursin(" pgcstack" , string(inst))
471+ @dispose builder= IRBuilder() begin
472+ position!(builder, inst)
473+ repl = call!(builder, function_type(jl_decl), jl_decl, LLVM. Value[])
474+ replace_uses!(inst, repl)
475+ end
476+ erase!(inst)
477+ changed = true
478+ end
479+ end
480+ end
481+
422482 return changed
423483end
424484GPULowerPTLSPass() = NewPMModulePass(" GPULowerPTLS" , lower_ptls!)
0 commit comments