Skip to content

Commit fa21d88

Browse files
committed
AMDGPU: Allow clamp for pattern x&(-1>>(32-y) to "bfe x, 0, y"
It is fine to match the pattern if we know y has at most five active bits (< 32).
1 parent 24a19f1 commit fa21d88

File tree

4 files changed

+25
-1
lines changed

4 files changed

+25
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include "SIISelLowering.h"
2323
#include "SIMachineFunctionInfo.h"
2424
#include "llvm/Analysis/UniformityAnalysis.h"
25-
#include "llvm/Analysis/ValueTracking.h"
2625
#include "llvm/CodeGen/FunctionLoweringInfo.h"
2726
#include "llvm/CodeGen/SelectionDAG.h"
2827
#include "llvm/CodeGen/SelectionDAGISel.h"

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "GCNSubtarget.h"
1818
#include "SIMachineFunctionInfo.h"
1919
#include "SIModeRegisterDefaults.h"
20+
#include "llvm/Analysis/ValueTracking.h"
2021
#include "llvm/CodeGen/SelectionDAGISel.h"
2122
#include "llvm/Target/TargetMachine.h"
2223

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3550,6 +3550,16 @@ def : AMDGPUPat <
35503550
(V_BFE_U32_e64 $src, (i32 0), $width)
35513551
>;
35523552

3553+
def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{
3554+
return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5;
3555+
}]>;
3556+
3557+
// x & (-1 >> (bitwidth - y))
3558+
def : AMDGPUPat <
3559+
(DivergentBinFrag<and> i32:$src, (srl_oneuse -1, (sub 32, uint5Bits:$width))),
3560+
(V_BFE_U32_e64 $src, (i32 0), $width)
3561+
>;
3562+
35533563
// SHA-256 Ma patterns
35543564

35553565
// ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y

llvm/test/CodeGen/AMDGPU/extract-lowbits.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,20 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
120120
ret i32 %masked
121121
}
122122

123+
define i32 @bzhi32_c0_clamp(i32 %val, i32 %numlowbits) nounwind {
124+
; GCN-LABEL: bzhi32_c0_clamp:
125+
; GCN: ; %bb.0:
126+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+
; GCN-NEXT: v_and_b32_e32 v1, 31, v1
128+
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
129+
; GCN-NEXT: s_setpc_b64 s[30:31]
130+
%low5bits = and i32 %numlowbits, 31
131+
%numhighbits = sub i32 32, %low5bits
132+
%mask = lshr i32 -1, %numhighbits
133+
%masked = and i32 %mask, %val
134+
ret i32 %masked
135+
}
136+
123137
define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
124138
; SI-LABEL: bzhi32_c1_indexzext:
125139
; SI: ; %bb.0:

0 commit comments

Comments
 (0)