diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 665db3025903e..80e10f33b770d 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -45,6 +45,7 @@ // * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant // * llvm.nvvm.ldg.global.f --> ibid. // * llvm.nvvm.ldg.global.p --> ibid. +// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32) def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap "llvm.nvvm.sust.p.3d.v4i32.trap">, ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">; -def int_nvvm_swap_lo_hi_b64 - : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], - [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, - ClangBuiltin<"__nvvm_swap_lo_hi_b64">; - - // Accessing special registers. class PTXReadSRegIntrinsicNB_r32 diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7a194219c5cd4..9be307bb071ed 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, if (Name.consume_front("abs.")) // nvvm.abs.{i,ii} Expand = Name == "i" || Name == "ll"; - else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f") + else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" || + Name == "swap.lo.hi.b64") Expand = true; else if (Name.consume_front("max.") || Name.consume_front("min.")) // nvvm.{min,max}.{i,ii,ui,ull} @@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, {Arg, Arg, ZExtShiftAmt}); + } else if (Name == "swap.lo.hi.b64") { + Type *Int64Ty = Builder.getInt64Ty(); + Value *Arg = CI->getOperand(0); + Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl, + {Arg, Arg, Builder.getInt64(32)}); } else if ((Name.consume_front("ptr.gen.to.") && (Name.starts_with("local") || Name.starts_with("shared") || Name.starts_with("global") || Name.starts_with("constant"))) || diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 90f56a421b19b..b2e05a567b4fe 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; - -def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src), - (V2I32toI64 (I64toI32H $src), - (I64toI32L $src))> ; - //----------------------------------- // Texture Intrinsics //----------------------------------- diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 5cc3a30277459..588e79a7428a4 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64) declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare i64 @llvm.nvvm.swap.lo.hi.b64(i64) declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) @@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) { ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6) ; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7) ; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8) +; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32) ; %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6) %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7) %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) + %r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b) ret void }