@@ -116,47 +116,10 @@ function process_entry!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
116
116
mod:: LLVM.Module , entry:: LLVM.Function )
117
117
invoke (process_entry!, Tuple{CompilerJob, LLVM. Module, LLVM. Function}, job, mod, entry)
118
118
119
- ctx = context (mod)
120
119
if job. source. kernel
121
120
# work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
122
121
entry = lower_byval (job, mod, entry)
123
122
124
- # property annotations
125
- annotations = Metadata[entry]
126
-
127
- # # kernel metadata
128
- append! (annotations, [MDString (" kernel" ; ctx),
129
- ConstantInt (Int32 (1 ); ctx)])
130
-
131
- # # expected CTA sizes
132
- if job. target. minthreads != = nothing
133
- for (dim, name) in enumerate ([:x , :y , :z ])
134
- bound = dim <= length (job. target. minthreads) ? job. target. minthreads[dim] : 1
135
- append! (annotations, [MDString (" reqntid$name " ; ctx),
136
- ConstantInt (Int32 (bound); ctx)])
137
- end
138
- end
139
- if job. target. maxthreads != = nothing
140
- for (dim, name) in enumerate ([:x , :y , :z ])
141
- bound = dim <= length (job. target. maxthreads) ? job. target. maxthreads[dim] : 1
142
- append! (annotations, [MDString (" maxntid$name " ; ctx),
143
- ConstantInt (Int32 (bound); ctx)])
144
- end
145
- end
146
-
147
- if job. target. blocks_per_sm != = nothing
148
- append! (annotations, [MDString (" minctasm" ; ctx),
149
- ConstantInt (Int32 (job. target. blocks_per_sm); ctx)])
150
- end
151
-
152
- if job. target. maxregs != = nothing
153
- append! (annotations, [MDString (" maxnreg" ; ctx),
154
- ConstantInt (Int32 (job. target. maxregs); ctx)])
155
- end
156
-
157
- push! (metadata (mod)[" nvvm.annotations" ], MDNode (annotations; ctx))
158
-
159
-
160
123
if LLVM. version () >= v " 8"
161
124
# calling convention
162
125
callconv! (entry, LLVM. API. LLVMPTXKernelCallConv)
168
131
169
132
function add_lowering_passes! (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ),
170
133
pm:: LLVM.PassManager )
134
+ # hide `unreachable` from LLVM so that it doesn't introduce divergent control flow
171
135
if ! job. target. unreachable
172
136
add! (pm, FunctionPass (" HideUnreachable" , hide_unreachable!))
173
137
end
@@ -208,6 +172,62 @@ function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
208
172
end
209
173
end
210
174
175
+ function finish_module! (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ), mod:: LLVM.Module )
176
+ ctx = context (mod)
177
+
178
+ # add metadata annotations for the assembler to the module
179
+ # NOTE: we need to do this as late as possible, because otherwise the metadata (which
180
+ # refers to a specific function) can get lost when cloning functions. normally
181
+ # RAUW updates those references, but we can't RAUW with a changed function type.
182
+ if job. source. kernel
183
+ # find the entry-point function
184
+ # XXX : make this an argument to `emit_asm` again?
185
+ entry = nothing
186
+ for f in functions (mod)
187
+ if callconv (f) == LLVM. API. LLVMPTXKernelCallConv
188
+ entry = f
189
+ break
190
+ end
191
+ end
192
+ @assert entry != = nothing
193
+
194
+ # property annotations
195
+ annotations = Metadata[entry]
196
+
197
+ # # kernel metadata
198
+ append! (annotations, [MDString (" kernel" ; ctx),
199
+ ConstantInt (Int32 (1 ); ctx)])
200
+
201
+ # # expected CTA sizes
202
+ if job. target. minthreads != = nothing
203
+ for (dim, name) in enumerate ([:x , :y , :z ])
204
+ bound = dim <= length (job. target. minthreads) ? job. target. minthreads[dim] : 1
205
+ append! (annotations, [MDString (" reqntid$name " ; ctx),
206
+ ConstantInt (Int32 (bound); ctx)])
207
+ end
208
+ end
209
+ if job. target. maxthreads != = nothing
210
+ for (dim, name) in enumerate ([:x , :y , :z ])
211
+ bound = dim <= length (job. target. maxthreads) ? job. target. maxthreads[dim] : 1
212
+ append! (annotations, [MDString (" maxntid$name " ; ctx),
213
+ ConstantInt (Int32 (bound); ctx)])
214
+ end
215
+ end
216
+
217
+ if job. target. blocks_per_sm != = nothing
218
+ append! (annotations, [MDString (" minctasm" ; ctx),
219
+ ConstantInt (Int32 (job. target. blocks_per_sm); ctx)])
220
+ end
221
+
222
+ if job. target. maxregs != = nothing
223
+ append! (annotations, [MDString (" maxnreg" ; ctx),
224
+ ConstantInt (Int32 (job. target. maxregs); ctx)])
225
+ end
226
+
227
+ push! (metadata (mod)[" nvvm.annotations" ], MDNode (annotations; ctx))
228
+ end
229
+ end
230
+
211
231
function llvm_debug_info (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ))
212
232
# allow overriding the debug info from CUDA.jl
213
233
if job. target. debuginfo
0 commit comments