@@ -15,6 +15,11 @@ function irgen(@nospecialize(job::CompilerJob), method_instance::Core.MethodInst
15
15
if Sys. iswindows ()
16
16
personality! (llvmf, nothing )
17
17
end
18
+
19
+ # remove the non-specialized jfptr functions
20
+ if startswith (LLVM. name (llvmf), " jfptr_" )
21
+ unsafe_delete! (mod, llvmf)
22
+ end
18
23
end
19
24
20
25
# remove the exception-handling personality function
@@ -68,6 +73,9 @@ function irgen(@nospecialize(job::CompilerJob), method_instance::Core.MethodInst
68
73
end
69
74
internalize! (pm, exports)
70
75
76
+ # inline llvmcall bodies
77
+ always_inliner! (pm)
78
+
71
79
can_throw (job) || add! (pm, ModulePass (" LowerThrow" , lower_throw!))
72
80
73
81
add_lowering_passes! (job, pm)
@@ -199,7 +207,7 @@ function lower_throw!(mod::LLVM.Module)
199
207
end
200
208
201
209
# remove the call
202
- call_args = collect ( operands (call) )[1 : end - 1 ] # last arg is function itself
210
+ call_args = operands (call)[1 : end - 1 ] # last arg is function itself
203
211
unsafe_delete! (LLVM. parent (call), call)
204
212
205
213
# HACK: kill the exceptions' unused arguments
@@ -377,90 +385,41 @@ end
377
385
# some back-ends don't support byval, or support it badly
378
386
# https://reviews.llvm.org/D79744
379
387
380
- # generate a kernel wrapper to fix & improve argument passing
381
- function lower_byval (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module , entry_f :: LLVM.Function )
388
+ # modify the kernel function to fix & improve argument passing
389
+ function lower_byval (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module , f :: LLVM.Function )
382
390
ctx = context (mod)
383
- entry_ft = eltype (llvmtype (entry_f):: LLVM.PointerType ):: LLVM.FunctionType
384
- @compiler_assert return_type (entry_ft) == LLVM. VoidType (ctx) job
385
-
386
- args = classify_arguments (job, entry_f)
387
- filter! (args) do arg
388
- arg. cc != GHOST
389
- end
390
-
391
- # generate the wrapper function type & definition
392
- wrapper_types = LLVM. LLVMType[]
393
- for arg in args
394
- typ = if arg. cc == BITS_REF
395
- eltype (arg. codegen. typ)
396
- else
397
- convert (LLVMType, arg. typ; ctx)
391
+ ft = eltype (llvmtype (f):: LLVM.PointerType ):: LLVM.FunctionType
392
+ @compiler_assert return_type (ft) == LLVM. VoidType (ctx) job
393
+
394
+ # find the byval parameters
395
+ byval = BitVector (undef, length (parameters (ft)))
396
+ if LLVM. version () >= v " 12"
397
+ for i in 1 : length (byval)
398
+ attrs = collect (parameter_attributes (f, i))
399
+ byval[i] = any (attrs) do attr
400
+ kind (attr) == kind (EnumAttribute (" byval" , 0 ; ctx))
401
+ end
402
+ end
403
+ else
404
+ # XXX : byval is not round-trippable on LLVM < 12 (see maleadt/LLVM.jl#186)
405
+ args = classify_arguments (job, f)
406
+ filter! (args) do arg
407
+ arg. cc != GHOST
398
408
end
399
- push! (wrapper_types, typ)
400
- end
401
- wrapper_fn = LLVM. name (entry_f)
402
- LLVM. name! (entry_f, wrapper_fn * " .inner" )
403
- wrapper_ft = LLVM. FunctionType (LLVM. VoidType (ctx), wrapper_types)
404
- wrapper_f = LLVM. Function (mod, wrapper_fn, wrapper_ft)
405
-
406
- # emit IR performing the "conversions"
407
- let builder = Builder (ctx)
408
- entry = BasicBlock (wrapper_f, " entry" ; ctx)
409
- position! (builder, entry)
410
-
411
- wrapper_args = Vector {LLVM.Value} ()
412
-
413
- # perform argument conversions
414
409
for arg in args
415
410
if arg. cc == BITS_REF
416
- # copy the argument value to a stack slot, and reference it.
417
- ptr = alloca! (builder, eltype (arg. codegen. typ))
418
- if LLVM. addrspace (arg. codegen. typ) != 0
419
- ptr = addrspacecast! (builder, ptr, arg. codegen. typ)
420
- end
421
- store! (builder, parameters (wrapper_f)[arg. codegen. i], ptr)
422
- push! (wrapper_args, ptr)
423
- else
424
- push! (wrapper_args, parameters (wrapper_f)[arg. codegen. i])
425
- for attr in collect (parameter_attributes (entry_f, arg. codegen. i))
426
- push! (parameter_attributes (wrapper_f, arg. codegen. i), attr)
427
- end
411
+ byval[arg. codegen. i] = true
428
412
end
429
413
end
430
-
431
- call! (builder, entry_f, wrapper_args)
432
-
433
- ret! (builder)
434
-
435
- dispose (builder)
436
414
end
437
415
438
- # early-inline the original entry function into the wrapper
439
- push! (function_attributes (entry_f), EnumAttribute (" alwaysinline" , 0 ; ctx))
440
- linkage! (entry_f, LLVM. API. LLVMInternalLinkage)
441
-
442
- # copy debug info
443
- sp = LLVM. get_subprogram (entry_f)
444
- if sp != = nothing
445
- LLVM. set_subprogram! (wrapper_f, sp)
446
- end
447
-
448
- fixup_metadata! (entry_f)
449
- ModulePassManager () do pm
450
- always_inliner! (pm)
451
- run! (pm, mod)
452
- end
453
-
454
- return wrapper_f
455
- end
456
-
457
- # HACK: get rid of invariant.load and const TBAA metadata on loads from pointer args,
458
- # since storing to a stack slot violates the semantics of those attributes.
459
- # TODO : can we emit a wrapper that doesn't violate Julia's metadata?
460
- function fixup_metadata! (f:: LLVM.Function )
461
- for param in parameters (f)
462
- if isa (llvmtype (param), LLVM. PointerType)
463
- # collect all uses of the pointer
416
+ # fixup metadata
417
+ #
418
+ # Julia emits invariant.load and const TBAA metadta on loads from pointer args,
419
+ # which is invalid now that we have materialized the byval.
420
+ for (i, param) in enumerate (parameters (f))
421
+ if byval[i]
422
+ # collect all uses of the argument
464
423
worklist = Vector {LLVM.Instruction} (user .(collect (uses (param))))
465
424
while ! isempty (worklist)
466
425
value = popfirst! (worklist)
@@ -480,11 +439,67 @@ function fixup_metadata!(f::LLVM.Function)
480
439
isa (value, LLVM. AddrSpaceCastInst)
481
440
append! (worklist, user .(collect (uses (value))))
482
441
end
442
+ end
443
+ end
444
+ end
445
+
446
+ # generate the new function type & definition
447
+ new_types = LLVM. LLVMType[]
448
+ for (i, param) in enumerate (parameters (ft))
449
+ if byval[i]
450
+ push! (new_types, eltype (param:: LLVM.PointerType ))
451
+ else
452
+ push! (new_types, param)
453
+ end
454
+ end
455
+ new_ft = LLVM. FunctionType (return_type (ft), new_types)
456
+ new_f = LLVM. Function (mod, " " , new_ft)
457
+ linkage! (new_f, linkage (f))
458
+
459
+ # emit IR performing the "conversions"
460
+ new_args = LLVM. Value[]
461
+ Builder (ctx) do builder
462
+ entry = BasicBlock (new_f, " entry" ; ctx)
463
+ position! (builder, entry)
483
464
484
- # IMPORTANT NOTE: if we ever want to inline functions at the LLVM level,
485
- # we need to recurse into call instructions here, and strip metadata from
486
- # called functions (see CUDAnative.jl#238).
465
+ # perform argument conversions
466
+ for (i, param) in enumerate (parameters (ft))
467
+ if byval[i]
468
+ # copy the argument value to a stack slot, and reference it.
469
+ ptr = alloca! (builder, eltype (param))
470
+ if LLVM. addrspace (param) != 0
471
+ ptr = addrspacecast! (builder, ptr, param)
472
+ end
473
+ store! (builder, parameters (new_f)[i], ptr)
474
+ push! (new_args, ptr)
475
+ else
476
+ push! (new_args, parameters (new_f)[i])
477
+ for attr in collect (parameter_attributes (f, i))
478
+ push! (parameter_attributes (new_f, i), attr)
479
+ end
487
480
end
488
481
end
482
+
483
+ # inline the old IR
484
+ value_map = Dict {LLVM.Value, LLVM.Value} (
485
+ param => new_args[i] for (i,param) in enumerate (parameters (f))
486
+ )
487
+ clone_into! (new_f, f; value_map,
488
+ changes= LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges)
489
+ # NOTE: we need global changes because LLVM 12 wants to clone debug metadata
490
+
491
+ # fall through
492
+ br! (builder, blocks (new_f)[2 ])
489
493
end
494
+
495
+ # remove the old function
496
+ # NOTE: if we ever have legitimate uses of the old function, create a shim instead
497
+ fn = LLVM. name (f)
498
+ @assert isempty (uses (f))
499
+ # XXX : there may still be metadata using this function. RAUW updates those,
500
+ # but asserts on a debug build due to the updated function type.
501
+ unsafe_delete! (mod, f)
502
+ LLVM. name! (new_f, fn)
503
+
504
+ return new_f
490
505
end
0 commit comments