Skip to content

Commit 7fecf8e

Browse files
committed
[RISCV] Fix incorrect folding of select on ctlz/cttz
This patch tries to fix #155014. The pattern of `ctlz`/`cttz` -> `icmp` -> `select` can occur when accounting for targets which don't support `cttz(0)` or `ctlz(0)`. We can replace this with a mask, but **only on power-of-2 bitwidths**.
1 parent dc3d14a commit 7fecf8e

File tree

2 files changed

+22
-9
lines changed

2 files changed

+22
-9
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18746,6 +18746,10 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
1874618746
if (Cond->getOperand(0) != CountZeroesArgument)
1874718747
return SDValue();
1874818748

18749+
unsigned BitWidth = CountZeroes.getValueSizeInBits();
18750+
if (!isPowerOf2_32(BitWidth))
18751+
return SDValue();
18752+
1874918753
if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
1875018754
CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
1875118755
CountZeroes.getValueType(), CountZeroesArgument);
@@ -18754,7 +18758,6 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
1875418758
CountZeroes.getValueType(), CountZeroesArgument);
1875518759
}
1875618760

18757-
unsigned BitWidth = CountZeroes.getValueSizeInBits();
1875818761
SDValue BitWidthMinusOne =
1875918762
DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
1876018763

llvm/test/CodeGen/RISCV/ctlz-cttz-select-fold.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,12 @@ define i7 @test_cttz_select_i7(i7 %0) {
4848
;
4949
; RV32ZBB-LABEL: test_cttz_select_i7:
5050
; RV32ZBB: # %bb.0:
51+
; RV32ZBB-NEXT: andi a1, a0, 127
5152
; RV32ZBB-NEXT: ori a0, a0, 128
5253
; RV32ZBB-NEXT: ctz a0, a0
53-
; RV32ZBB-NEXT: andi a0, a0, 6
54+
; RV32ZBB-NEXT: seqz a1, a1
55+
; RV32ZBB-NEXT: addi a1, a1, -1
56+
; RV32ZBB-NEXT: and a0, a1, a0
5457
; RV32ZBB-NEXT: ret
5558
;
5659
; RV64-LABEL: test_cttz_select_i7:
@@ -98,9 +101,12 @@ define i7 @test_cttz_select_i7(i7 %0) {
98101
;
99102
; RV64ZBB-LABEL: test_cttz_select_i7:
100103
; RV64ZBB: # %bb.0:
104+
; RV64ZBB-NEXT: andi a1, a0, 127
101105
; RV64ZBB-NEXT: ori a0, a0, 128
102106
; RV64ZBB-NEXT: ctz a0, a0
103-
; RV64ZBB-NEXT: andi a0, a0, 6
107+
; RV64ZBB-NEXT: seqz a1, a1
108+
; RV64ZBB-NEXT: addi a1, a1, -1
109+
; RV64ZBB-NEXT: and a0, a1, a0
104110
; RV64ZBB-NEXT: ret
105111
%2 = call i7 @llvm.cttz.i7(i7 %0, i1 false)
106112
%3 = icmp eq i7 %0, 0
@@ -159,9 +165,11 @@ define i10 @test_ctlz_select_i10(i10 %0) {
159165
; RV32ZBB-LABEL: test_ctlz_select_i10:
160166
; RV32ZBB: # %bb.0:
161167
; RV32ZBB-NEXT: andi a0, a0, 1023
162-
; RV32ZBB-NEXT: clz a0, a0
163-
; RV32ZBB-NEXT: addi a0, a0, -22
164-
; RV32ZBB-NEXT: andi a0, a0, 9
168+
; RV32ZBB-NEXT: clz a1, a0
169+
; RV32ZBB-NEXT: seqz a0, a0
170+
; RV32ZBB-NEXT: addi a1, a1, -22
171+
; RV32ZBB-NEXT: addi a0, a0, -1
172+
; RV32ZBB-NEXT: and a0, a0, a1
165173
; RV32ZBB-NEXT: ret
166174
;
167175
; RV64-LABEL: test_ctlz_select_i10:
@@ -219,9 +227,11 @@ define i10 @test_ctlz_select_i10(i10 %0) {
219227
; RV64ZBB-LABEL: test_ctlz_select_i10:
220228
; RV64ZBB: # %bb.0:
221229
; RV64ZBB-NEXT: andi a0, a0, 1023
222-
; RV64ZBB-NEXT: clz a0, a0
223-
; RV64ZBB-NEXT: addi a0, a0, -54
224-
; RV64ZBB-NEXT: andi a0, a0, 9
230+
; RV64ZBB-NEXT: clz a1, a0
231+
; RV64ZBB-NEXT: seqz a0, a0
232+
; RV64ZBB-NEXT: addi a1, a1, -54
233+
; RV64ZBB-NEXT: addi a0, a0, -1
234+
; RV64ZBB-NEXT: and a0, a0, a1
225235
; RV64ZBB-NEXT: ret
226236
%2 = call i10 @llvm.ctlz.i10(i10 %0, i1 false)
227237
%3 = icmp eq i10 %0, 0

0 commit comments

Comments
 (0)