Skip to content

Commit 27d670f

Browse files
committed
[RISCV] Select unsigned bitfield insert for XAndesPerf
The XAndesPerf extension includes unsigned bitfield extraction instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1, place them starting at bit Msb, and zero-fills the remaining bits. This patch handles the cases where Msb < Lsb. Instruction Sytax: nds.bfoz Rd, Rs1, Msb, Lsb The operation is: if Msb < Lsb: Lenm1 = Lsb - Msb; Rd[Lsb:Msb] = Rs1[Lenm1:0]; if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0; Rd[Msb-1:0]=0; When Len == 1, it is a special case where the Msb is set to 0 instead of being equal to the Lsb.
1 parent 4d5c0f4 commit 27d670f

File tree

5 files changed

+49
-26
lines changed

5 files changed

+49
-26
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,23 @@ bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL,
688688
return true;
689689
}
690690

691+
bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL,
692+
MVT VT, SDValue X,
693+
unsigned Msb,
694+
unsigned Lsb) {
695+
// Only supported with XAndesPerf at the moment.
696+
if (!Subtarget->hasVendorXAndesPerf())
697+
return false;
698+
699+
unsigned Opc = RISCV::NDS_BFOZ;
700+
701+
SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
702+
CurDAG->getTargetConstant(Msb, DL, VT),
703+
CurDAG->getTargetConstant(Lsb, DL, VT));
704+
ReplaceNode(Node, Ubi);
705+
return true;
706+
}
707+
691708
bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
692709
// Target does not support indexed loads.
693710
if (!Subtarget->hasVendorXTHeadMemIdx())
@@ -1324,6 +1341,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
13241341
return;
13251342
}
13261343

1344+
// Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1345+
// available.
1346+
// Transform (and (shl x, c2), c1)
1347+
// -> (<bfinsert> x, msb, lsb)
1348+
// e.g.
1349+
// (and (shl x, 12), 0x00fff000)
1350+
// If XLen = 32 and C2 = 12, then
1351+
// Len = 32 - 8 - 12 = 12,
1352+
// Lsb = 32 - 8 - 1 = 23 and Msb = 12
1353+
// -> nds.bfoz x, 12, 23
1354+
const unsigned Len = XLen - Leading - C2;
1355+
const unsigned Lsb = XLen - Leading - 1;
1356+
// If Len is 1, the Msb will be 0 instead of C2.
1357+
unsigned Msb = Len == 1 ? 0 : C2;
1358+
if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1359+
return;
1360+
13271361
// (srli (slli c2+c3), c3)
13281362
if (OneUseOrZExtW && !IsCANDI) {
13291363
SDNode *SLLI = CurDAG->getMachineNode(

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
7979
bool trySignedBitfieldExtract(SDNode *Node);
8080
bool tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL, MVT VT, SDValue X,
8181
unsigned Msb, unsigned Lsb);
82+
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL, MVT VT,
83+
SDValue X, unsigned Msb, unsigned Lsb);
8284
bool tryIndexedLoad(SDNode *Node);
8385

8486
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);

llvm/test/CodeGen/RISCV/rv32xandesperf.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
7979
define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
8080
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
8181
; CHECK: # %bb.0:
82-
; CHECK-NEXT: slli a0, a0, 31
83-
; CHECK-NEXT: srli a0, a0, 16
82+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
8483
; CHECK-NEXT: ret
8584
%shifted = shl i32 %x, 15
8685
%masked = and i32 %shifted, 32768
@@ -90,8 +89,7 @@ define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
9089
define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
9190
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
9291
; CHECK: # %bb.0:
93-
; CHECK-NEXT: slli a0, a0, 31
94-
; CHECK-NEXT: srli a0, a0, 13
92+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
9593
; CHECK-NEXT: ret
9694
%shl = shl i32 %x, 31
9795
%lshr = lshr i32 %shl, 13
@@ -103,8 +101,7 @@ define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
103101
define i32 @bfoz_from_and_shl_i32(i32 %x) {
104102
; CHECK-LABEL: bfoz_from_and_shl_i32:
105103
; CHECK: # %bb.0:
106-
; CHECK-NEXT: slli a0, a0, 20
107-
; CHECK-NEXT: srli a0, a0, 8
104+
; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
108105
; CHECK-NEXT: ret
109106
%shifted = shl i32 %x, 12
110107
%masked = and i32 %shifted, 16773120
@@ -114,8 +111,7 @@ define i32 @bfoz_from_and_shl_i32(i32 %x) {
114111
define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
115112
; CHECK-LABEL: bfoz_from_lshr_shl_i32:
116113
; CHECK: # %bb.0:
117-
; CHECK-NEXT: slli a0, a0, 26
118-
; CHECK-NEXT: srli a0, a0, 7
114+
; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
119115
; CHECK-NEXT: ret
120116
%shl = shl i32 %x, 26
121117
%lshr = lshr i32 %shl, 7

llvm/test/CodeGen/RISCV/rv64xandesperf.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
6969
define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
7070
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
7171
; CHECK: # %bb.0:
72-
; CHECK-NEXT: slli a0, a0, 63
73-
; CHECK-NEXT: srli a0, a0, 48
72+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
7473
; CHECK-NEXT: ret
7574
%shifted = shl i32 %x, 15
7675
%masked = and i32 %shifted, 32768
@@ -80,8 +79,7 @@ define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
8079
define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
8180
; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i64:
8281
; CHECK: # %bb.0:
83-
; CHECK-NEXT: slli a0, a0, 63
84-
; CHECK-NEXT: srli a0, a0, 15
82+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 48
8583
; CHECK-NEXT: ret
8684
%shifted = shl i64 %x, 48
8785
%masked = and i64 %shifted, 281474976710656
@@ -91,8 +89,7 @@ define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
9189
define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
9290
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
9391
; CHECK: # %bb.0:
94-
; CHECK-NEXT: slli a0, a0, 63
95-
; CHECK-NEXT: srli a0, a0, 45
92+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
9693
; CHECK-NEXT: ret
9794
%shl = shl i32 %x, 31
9895
%lshr = lshr i32 %shl, 13
@@ -102,8 +99,7 @@ define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
10299
define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
103100
; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i64:
104101
; CHECK: # %bb.0:
105-
; CHECK-NEXT: slli a0, a0, 63
106-
; CHECK-NEXT: srli a0, a0, 19
102+
; CHECK-NEXT: nds.bfoz a0, a0, 0, 44
107103
; CHECK-NEXT: ret
108104
%shl = shl i64 %x, 63
109105
%lshr = lshr i64 %shl, 19
@@ -115,8 +111,7 @@ define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
115111
define i32 @bfoz_from_and_shl_i32(i32 %x) {
116112
; CHECK-LABEL: bfoz_from_and_shl_i32:
117113
; CHECK: # %bb.0:
118-
; CHECK-NEXT: slli a0, a0, 52
119-
; CHECK-NEXT: srli a0, a0, 40
114+
; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
120115
; CHECK-NEXT: ret
121116
%shifted = shl i32 %x, 12
122117
%masked = and i32 %shifted, 16773120
@@ -126,8 +121,7 @@ define i32 @bfoz_from_and_shl_i32(i32 %x) {
126121
define i64 @bfoz_from_and_shl_i64(i64 %x) {
127122
; CHECK-LABEL: bfoz_from_and_shl_i64:
128123
; CHECK: # %bb.0:
129-
; CHECK-NEXT: slli a0, a0, 52
130-
; CHECK-NEXT: srli a0, a0, 28
124+
; CHECK-NEXT: nds.bfoz a0, a0, 24, 35
131125
; CHECK-NEXT: ret
132126
%shifted = shl i64 %x, 24
133127
%masked = and i64 %shifted, 68702699520
@@ -137,8 +131,7 @@ define i64 @bfoz_from_and_shl_i64(i64 %x) {
137131
define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
138132
; CHECK-LABEL: bfoz_from_lshr_shl_i32:
139133
; CHECK: # %bb.0:
140-
; CHECK-NEXT: slli a0, a0, 58
141-
; CHECK-NEXT: srli a0, a0, 39
134+
; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
142135
; CHECK-NEXT: ret
143136
%shl = shl i32 %x, 26
144137
%lshr = lshr i32 %shl, 7
@@ -148,8 +141,7 @@ define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
148141
define i64 @bfoz_from_lshr_shl_i64(i64 %x) {
149142
; CHECK-LABEL: bfoz_from_lshr_shl_i64:
150143
; CHECK: # %bb.0:
151-
; CHECK-NEXT: slli a0, a0, 40
152-
; CHECK-NEXT: srli a0, a0, 15
144+
; CHECK-NEXT: nds.bfoz a0, a0, 25, 48
153145
; CHECK-NEXT: ret
154146
%shl = shl i64 %x, 40
155147
%lshr = lshr i64 %shl, 15

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ define i64 @slliuw(i64 %a) nounwind {
2424
;
2525
; RV64XANDESPERF-LABEL: slliuw:
2626
; RV64XANDESPERF: # %bb.0:
27-
; RV64XANDESPERF-NEXT: slli a0, a0, 32
28-
; RV64XANDESPERF-NEXT: srli a0, a0, 31
27+
; RV64XANDESPERF-NEXT: nds.bfoz a0, a0, 1, 32
2928
; RV64XANDESPERF-NEXT: ret
3029
%conv1 = shl i64 %a, 1
3130
%shl = and i64 %conv1, 8589934590

0 commit comments

Comments
 (0)