Skip to content

Commit 4ce74bf

Browse files
authored
[RISCV] Use default promotion for i32 CTLZ on RV64 with XTHeadBb. (#157994)
The existing isel pattern felt like it was emitting more instructions than an isel pattern probably should. We were also missing opportunities to fold the innermost instructions with surrounding instructions. I tried to move the expansion to lowering, but we got a little too aggressive folding the (not (slli (not))) with other operations in some tests and created code with constants that are hard to materialize and missed using TH_FF0. We could probably have fixed that with a RISCVISD::TH_FF0 node. While investigating, I tried using the default promotion. The results aren't obviously worse than the previous codegen. And in some case they are obviously better.
1 parent 3e18b5a commit 4ce74bf

File tree

4 files changed

+17
-25
lines changed

4 files changed

+17
-25
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -422,12 +422,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
422422
(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
423423
// We need the custom lowering to make sure that the resulting sequence
424424
// for the 32bit case is efficient on 64bit targets.
425-
if (Subtarget.is64Bit()) {
426-
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
427-
// Use default promotion for XTHeadBb.
428-
if (Subtarget.hasStdExtZbb())
429-
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
430-
}
425+
// Use default promotion for i32 without Zbb.
426+
if (Subtarget.is64Bit() && Subtarget.hasStdExtZbb())
427+
setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
431428
} else {
432429
setOperationAction(ISD::CTLZ, XLenVT, Expand);
433430
}

llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,6 @@ def : Pat<(i64 (sra (bswap GPR:$rs1), (i64 32))),
594594
(TH_REVW GPR:$rs1)>;
595595
def : Pat<(binop_allwusers<srl> (bswap GPR:$rs1), (i64 32)),
596596
(TH_REVW GPR:$rs1)>;
597-
def : Pat<(riscv_clzw GPR:$rs1),
598-
(TH_FF0 (i64 (SLLI (i64 (XORI GPR:$rs1, -1)), 32)))>;
599597
} // Predicates = [HasVendorXTHeadBb, IsRV64]
600598

601599
let Predicates = [HasVendorXTHeadBs] in {

llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1375,9 +1375,9 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
13751375
;
13761376
; RV64XTHEADBB-LABEL: test_ctlz_i32:
13771377
; RV64XTHEADBB: # %bb.0:
1378-
; RV64XTHEADBB-NEXT: not a0, a0
1379-
; RV64XTHEADBB-NEXT: slli a0, a0, 32
1380-
; RV64XTHEADBB-NEXT: th.ff0 a0, a0
1378+
; RV64XTHEADBB-NEXT: th.extu a0, a0, 31, 0
1379+
; RV64XTHEADBB-NEXT: th.ff1 a0, a0
1380+
; RV64XTHEADBB-NEXT: addi a0, a0, -32
13811381
; RV64XTHEADBB-NEXT: ret
13821382
%tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
13831383
ret i32 %tmp

llvm/test/CodeGen/RISCV/rv64xtheadbb.ll

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
5252
;
5353
; RV64XTHEADBB-NOB-LABEL: ctlz_i32:
5454
; RV64XTHEADBB-NOB: # %bb.0:
55-
; RV64XTHEADBB-NOB-NEXT: not a0, a0
56-
; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
57-
; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
55+
; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
56+
; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
57+
; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -32
5858
; RV64XTHEADBB-NOB-NEXT: ret
5959
;
6060
; RV64XTHEADBB-B-LABEL: ctlz_i32:
@@ -112,10 +112,9 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
112112
;
113113
; RV64XTHEADBB-NOB-LABEL: log2_i32:
114114
; RV64XTHEADBB-NOB: # %bb.0:
115-
; RV64XTHEADBB-NOB-NEXT: not a0, a0
116-
; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
117-
; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
118-
; RV64XTHEADBB-NOB-NEXT: li a1, 31
115+
; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
116+
; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
117+
; RV64XTHEADBB-NOB-NEXT: li a1, 63
119118
; RV64XTHEADBB-NOB-NEXT: sub a0, a1, a0
120119
; RV64XTHEADBB-NOB-NEXT: ret
121120
;
@@ -177,10 +176,9 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
177176
; RV64XTHEADBB-NOB-LABEL: log2_ceil_i32:
178177
; RV64XTHEADBB-NOB: # %bb.0:
179178
; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -1
180-
; RV64XTHEADBB-NOB-NEXT: not a0, a0
181-
; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
182-
; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
183-
; RV64XTHEADBB-NOB-NEXT: li a1, 32
179+
; RV64XTHEADBB-NOB-NEXT: th.extu a0, a0, 31, 0
180+
; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
181+
; RV64XTHEADBB-NOB-NEXT: li a1, 64
184182
; RV64XTHEADBB-NOB-NEXT: sub a0, a1, a0
185183
; RV64XTHEADBB-NOB-NEXT: ret
186184
;
@@ -309,9 +307,8 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
309307
; RV64XTHEADBB-NOB-LABEL: ctlz_lshr_i32:
310308
; RV64XTHEADBB-NOB: # %bb.0:
311309
; RV64XTHEADBB-NOB-NEXT: srliw a0, a0, 1
312-
; RV64XTHEADBB-NOB-NEXT: not a0, a0
313-
; RV64XTHEADBB-NOB-NEXT: slli a0, a0, 32
314-
; RV64XTHEADBB-NOB-NEXT: th.ff0 a0, a0
310+
; RV64XTHEADBB-NOB-NEXT: th.ff1 a0, a0
311+
; RV64XTHEADBB-NOB-NEXT: addi a0, a0, -32
315312
; RV64XTHEADBB-NOB-NEXT: ret
316313
;
317314
; RV64XTHEADBB-B-LABEL: ctlz_lshr_i32:

0 commit comments

Comments
 (0)