187
187
188
188
function finish_ir! (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ),
189
189
mod:: LLVM.Module , entry:: LLVM.Function )
190
- lower_trap! (mod)
191
190
for f in functions (mod)
192
191
lower_unreachable! (f)
193
192
end
246
245
247
246
# # LLVM passes
248
247
249
- # replace calls to `trap` with inline assembly calling `exit`, which isn't fatal
250
- function lower_trap! (mod:: LLVM.Module )
251
- job = current_job:: CompilerJob
252
- changed = false
253
- @timeit_debug to " lower trap" begin
254
-
255
- if haskey (functions (mod), " llvm.trap" )
256
- trap = functions (mod)[" llvm.trap" ]
257
-
258
- # inline assembly to exit a thread
259
- exit_ft = LLVM. FunctionType (LLVM. VoidType ())
260
- exit = InlineAsm (exit_ft, " exit;" , " " , true )
261
-
262
- for use in uses (trap)
263
- val = user (use)
264
- if isa (val, LLVM. CallInst)
265
- @dispose builder= IRBuilder () begin
266
- position! (builder, val)
267
- call! (builder, exit_ft, exit)
268
- end
269
- unsafe_delete! (LLVM. parent (val), val)
270
- changed = true
271
- end
272
- end
273
- end
274
-
275
- end
276
- return changed
277
- end
278
-
279
248
# lower `unreachable` to `exit` so that the emitted PTX has correct control flow
280
249
#
281
250
# During back-end compilation, `ptxas` inserts instructions to manage the harware's
@@ -328,10 +297,14 @@ end
328
297
# `bar.sync` cannot be executed divergently on Pascal hardware or earlier.
329
298
#
330
299
# To avoid these fall-through successors that change the control flow,
331
- # we replace `unreachable` instructions with a call to `exit`. This informs
332
- # `ptxas` that the thread exits, and allows it to correctly construct a CFG,
333
- # and consequently correctly determine the divergence regions as intended.
300
+ # we replace `unreachable` instructions with a call to `trap` and `exit`. This
301
+ # informs `ptxas` that the thread exits, and allows it to correctly construct a
302
+ # CFG, and consequently correctly determine the divergence regions as intended.
303
+ # Note that we first emit a call to `trap`, so that the behaviour is the same
304
+ # as before.
334
305
function lower_unreachable! (f:: LLVM.Function )
306
+ mod = LLVM. parent (f)
307
+
335
308
# TODO :
336
309
# - if unreachable blocks have been merged, we still may be jumping from different
337
310
# divergent regions, potentially causing the same problem as above:
@@ -375,6 +348,12 @@ function lower_unreachable!(f::LLVM.Function)
375
348
# inline assembly to exit a thread
376
349
exit_ft = LLVM. FunctionType (LLVM. VoidType ())
377
350
exit = InlineAsm (exit_ft, " exit;" , " " , true )
351
+ trap_ft = LLVM. FunctionType (LLVM. VoidType ())
352
+ trap = if haskey (functions (mod), " llvm.trap" )
353
+ functions (mod)[" llvm.trap" ]
354
+ else
355
+ LLVM. Function (mod, " llvm.trap" , trap_ft)
356
+ end
378
357
379
358
# rewrite the unreachable terminators
380
359
@dispose builder= IRBuilder () begin
@@ -384,6 +363,7 @@ function lower_unreachable!(f::LLVM.Function)
384
363
@assert inst isa LLVM. UnreachableInst
385
364
386
365
position! (builder, inst)
366
+ call! (builder, trap_ft, trap)
387
367
call! (builder, exit_ft, exit)
388
368
end
389
369
end
0 commit comments