File tree Expand file tree Collapse file tree 2 files changed +18
-4
lines changed Expand file tree Collapse file tree 2 files changed +18
-4
lines changed Original file line number Diff line number Diff line change @@ -262,9 +262,16 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
262262 auto fence_type_ptr = as_const_int (op->args [0 ]);
263263 internal_assert (fence_type_ptr) << " gpu_thread_barrier() parameter is not a constant integer.\n " ;
264264
265- llvm::Function *barrier0 = module ->getFunction (" llvm.nvvm.barrier0" );
266- internal_assert (barrier0) << " Could not find PTX barrier intrinsic (llvm.nvvm.barrier0)\n " ;
267- builder->CreateCall (barrier0);
265+ llvm::Function *barrier;
266+ if ((barrier = module ->getFunction (" llvm.nvvm.barrier.cta.sync.aligned.all" )) && barrier->getIntrinsicID () != 0 ) {
267+ // LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615
268+ builder->CreateCall (barrier, builder->getInt32 (0 ));
269+ } else if ((barrier = module ->getFunction (" llvm.nvvm.barrier0" )) && barrier->getIntrinsicID () != 0 ) {
270+ // LLVM 21.1.5 and below: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20
271+ builder->CreateCall (barrier);
272+ } else {
273+ internal_error << " Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n " ;
274+ }
268275 value = ConstantInt::get (i32_t , 0 );
269276 return ;
270277 }
Original file line number Diff line number Diff line change 1- declare void @llvm.nvvm.barrier0 ()
1+ ; The two forward declared intrinsics below refer to the same thing.
2+ ; LLVM 20.1.6 introduced a new naming scheme for these intrinsics
3+ ; We have to declare both, such that we can access them from the Module's
4+ ; getFunction(), but one of those will map to an intrinsic, which we
5+ ; will use to determine which intrinsic is supported by LLVM.
6+ declare void @llvm.nvvm.barrier0 () ; LLVM <=20.1.5
7+ declare void @llvm.nvvm.barrier.cta.sync.aligned.all (i32 ) ; LLVM >=20.1.6
8+
29declare i32 @llvm.nvvm.read.ptx.sreg.tid.x ()
310declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x ()
411declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x ()
You can’t perform that action at this time.
0 commit comments