-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NVPTX] Add support for clamped funnel shift intrinsics #113228
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,8 +14,12 @@ | |
| #include "llvm/CodeGen/BasicTTIImpl.h" | ||
| #include "llvm/CodeGen/CostTable.h" | ||
| #include "llvm/CodeGen/TargetLowering.h" | ||
| #include "llvm/IR/Constants.h" | ||
| #include "llvm/IR/Intrinsics.h" | ||
| #include "llvm/IR/IntrinsicsNVPTX.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/IR/Value.h" | ||
| #include "llvm/Support/Casting.h" | ||
| #include "llvm/Transforms/InstCombine/InstCombiner.h" | ||
| #include <optional> | ||
| using namespace llvm; | ||
|
|
||
|
|
@@ -134,6 +138,7 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { | |
| // simplify. | ||
| enum SpecialCase { | ||
| SPC_Reciprocal, | ||
| SCP_FunnelShiftClamp, | ||
| }; | ||
|
|
||
| // SimplifyAction is a poor-man's variant (plus an additional flag) that | ||
|
|
@@ -314,6 +319,10 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { | |
| case Intrinsic::nvvm_rcp_rn_d: | ||
| return {SPC_Reciprocal, FTZ_Any}; | ||
|
|
||
| case Intrinsic::nvvm_fshl_clamp: | ||
| case Intrinsic::nvvm_fshr_clamp: | ||
| return {SCP_FunnelShiftClamp, FTZ_Any}; | ||
|
|
||
| // We do not currently simplify intrinsics that give an approximate | ||
| // answer. These include: | ||
| // | ||
|
|
@@ -384,6 +393,22 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { | |
| return BinaryOperator::Create( | ||
| Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1), | ||
| II->getArgOperand(0), II->getName()); | ||
|
|
||
| case SCP_FunnelShiftClamp: { | ||
| // Canoncialize a clamping funnel shift to the generic llvm funnel shift | ||
| // when possible, as this is easier for llvm to optimize further. | ||
| if (const auto *ShiftConst = dyn_cast<ConstantInt>(II->getArgOperand(2))) { | ||
| if (ShiftConst->getZExtValue() >= II->getType()->getIntegerBitWidth()) | ||
| return IC.replaceInstUsesWith(*II, II->getArgOperand(1)); | ||
|
||
|
|
||
| const bool IsLeft = II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp; | ||
| const unsigned FshIID = IsLeft ? Intrinsic::fshl : Intrinsic::fshr; | ||
| return CallInst::Create(Intrinsic::getOrInsertDeclaration( | ||
| II->getModule(), FshIID, II->getType()), | ||
| SmallVector<Value *, 3>(II->args())); | ||
| } | ||
| return nullptr; | ||
| } | ||
| } | ||
| llvm_unreachable("All SpecialCase enumerators should be handled in switch."); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -march=nvptx -mcpu=sm_61 | FileCheck %s | ||
| ; RUN: llc < %s -march=nvptx64 -mcpu=sm_61 | FileCheck %s | ||
|
|
||
| target triple = "nvptx-nvidia-cuda" | ||
|
|
||
| declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32) | ||
| declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32) | ||
|
|
||
| define i32 @fshr_clamp_r(i32 %a, i32 %b, i32 %c) { | ||
|
||
| ; CHECK-LABEL: fshr_clamp_r( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b32 %r<5>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [fshr_clamp_r_param_0]; | ||
| ; CHECK-NEXT: ld.param.u32 %r2, [fshr_clamp_r_param_1]; | ||
| ; CHECK-NEXT: ld.param.u32 %r3, [fshr_clamp_r_param_2]; | ||
| ; CHECK-NEXT: shf.r.clamp.b32 %r4, %r2, %r1, %r3; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; | ||
| ; CHECK-NEXT: ret; | ||
| %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| define i32 @fshl_clamp_r(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK-LABEL: fshl_clamp_r( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b32 %r<5>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [fshl_clamp_r_param_0]; | ||
| ; CHECK-NEXT: ld.param.u32 %r2, [fshl_clamp_r_param_1]; | ||
| ; CHECK-NEXT: ld.param.u32 %r3, [fshl_clamp_r_param_2]; | ||
| ; CHECK-NEXT: shf.l.clamp.b32 %r4, %r2, %r1, %r3; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; | ||
| ; CHECK-NEXT: ret; | ||
| %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| define i32 @fshr_clamp_i(i32 %a, i32 %b, i32 %c) { | ||
|
||
| ; CHECK-LABEL: fshr_clamp_i( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b32 %r<4>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [fshr_clamp_i_param_0]; | ||
| ; CHECK-NEXT: ld.param.u32 %r2, [fshr_clamp_i_param_1]; | ||
| ; CHECK-NEXT: shf.r.clamp.b32 %r3, %r2, %r1, 3; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; | ||
| ; CHECK-NEXT: ret; | ||
| %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| define i32 @fshl_clamp_i(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK-LABEL: fshl_clamp_i( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b32 %r<4>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [fshl_clamp_i_param_0]; | ||
| ; CHECK-NEXT: ld.param.u32 %r2, [fshl_clamp_i_param_1]; | ||
| ; CHECK-NEXT: shf.l.clamp.b32 %r3, %r2, %r1, 3; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; | ||
| ; CHECK-NEXT: ret; | ||
| %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3) | ||
| ret i32 %call | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -384,6 +384,34 @@ define float @test_sqrt_rn_f_ftz(float %a) #0 { | |
| ret float %ret | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @test_fshl_clamp_1 | ||
| define i32 @test_fshl_clamp_1(i32 %a, i32 %b, i32 %c) { | ||
|
||
| ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3) | ||
| %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @test_fshl_clamp_2 | ||
| define i32 @test_fshl_clamp_2(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK: ret i32 %b | ||
| %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @test_fshr_clamp_1 | ||
| define i32 @test_fshr_clamp_1(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29) | ||
| %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3) | ||
| ret i32 %call | ||
| } | ||
|
|
||
| ; CHECK-LABEL: @test_fshr_clamp_2 | ||
| define i32 @test_fshr_clamp_2(i32 %a, i32 %b, i32 %c) { | ||
| ; CHECK: ret i32 %b | ||
|
||
| %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 300) | ||
Artem-B marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ret i32 %call | ||
| } | ||
|
|
||
| declare double @llvm.nvvm.add.rn.d(double, double) | ||
| declare float @llvm.nvvm.add.rn.f(float, float) | ||
| declare float @llvm.nvvm.add.rn.ftz.f(float, float) | ||
|
|
@@ -454,3 +482,5 @@ declare double @llvm.nvvm.ui2d.rn(i32) | |
| declare float @llvm.nvvm.ui2f.rn(i32) | ||
| declare double @llvm.nvvm.ull2d.rn(i64) | ||
| declare float @llvm.nvvm.ull2f.rn(i64) | ||
| declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32) | ||
| declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we rename
%a, %b, %c->%hi,%lo,%nso the purpose of each argument is obvious from the function syntax alone, without having to read and parse a paragraph-long sentence?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed