52
52
53
53
function finish_module! (@nospecialize (job:: CompilerJob{GCNCompilerTarget} ),
54
54
mod:: LLVM.Module , entry:: LLVM.Function )
55
- # we have to fake our target early in the pipeline because Julia's optimization passes
56
- # weren't designed for a non-0 stack addrspace, and the AMDGPU target is very strict
57
- # about which addrspaces are permitted for various code patterns
58
- triple! (mod, llvm_triple (NativeCompilerTarget ()))
59
- datalayout! (mod, julia_datalayout (NativeCompilerTarget ()))
60
-
61
55
entry = invoke (finish_module!, Tuple{CompilerJob, LLVM. Module, LLVM. Function}, job, mod, entry)
62
56
63
57
if job. source. kernel
@@ -68,6 +62,17 @@ function finish_module!(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
68
62
return entry
69
63
end
70
64
65
+ function optimize! (job:: CompilerJob{GCNCompilerTarget} , mod:: LLVM.Module )
66
+ # we have to fake our target early in the pipeline because Julia's
67
+ # optimization passes weren't designed for a non-0 stack addrspace, and the
68
+ # AMDGPU target is very strict about which addrspaces are permitted for
69
+ # various code patterns
70
+ triple! (mod, llvm_triple (NativeCompilerTarget ()))
71
+ datalayout! (mod, julia_datalayout (NativeCompilerTarget ()))
72
+
73
+ invoke (optimize!, Tuple{CompilerJob, LLVM. Module}, job, mod)
74
+ end
75
+
71
76
# We need to do alloca rewriting (from 0 to 5) after Julia's optimization
72
77
# passes because of two reasons:
73
78
# 1. Debug builds call the target verifier first, which would trip if AMDGPU
0 commit comments