Skip to content

Commit 1f06368

Browse files
committed
Switch to using vlmax form
1 parent d95a0f9 commit 1f06368

File tree

5 files changed

+776
-793
lines changed

5 files changed

+776
-793
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4361,27 +4361,19 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
43614361
if ((LoC >> 31) == HiC)
43624362
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
43634363

4364-
// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4365-
// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4366-
// vlmax vsetvli or vsetivli to change the VL.
4367-
// FIXME: Support larger constants?
4368-
// FIXME: Support non-constant VLs by saturating?
4364+
// Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4365+
// VL. This can temporarily increase VL if VL less than VLMAX.
43694366
if (LoC == HiC) {
43704367
SDValue NewVL;
4371-
if (isAllOnesConstant(VL) ||
4372-
(isa<RegisterSDNode>(VL) &&
4373-
cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4374-
NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4375-
else if (isa<ConstantSDNode>(VL))
4368+
if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
43764369
NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4377-
4378-
if (NewVL) {
4379-
MVT InterVT =
4380-
MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4381-
auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4382-
DAG.getUNDEF(InterVT), Lo, NewVL);
4383-
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4384-
}
4370+
else
4371+
NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4372+
MVT InterVT =
4373+
MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4374+
auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4375+
DAG.getUNDEF(InterVT), Lo, NewVL);
4376+
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
43854377
}
43864378
}
43874379

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll

Lines changed: 46 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,13 +1725,12 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17251725
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
17261726
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
17271727
; RV32-NEXT: lui a1, 61681
1728-
; RV32-NEXT: li a2, 32
1729-
; RV32-NEXT: lui a3, 209715
1730-
; RV32-NEXT: lui a4, 349525
1728+
; RV32-NEXT: lui a2, 209715
1729+
; RV32-NEXT: lui a3, 349525
17311730
; RV32-NEXT: addi a1, a1, -241
1732-
; RV32-NEXT: addi a3, a3, 819
1733-
; RV32-NEXT: addi a4, a4, 1365
1734-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1731+
; RV32-NEXT: addi a2, a2, 819
1732+
; RV32-NEXT: addi a3, a3, 1365
1733+
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
17351734
; RV32-NEXT: vmv.v.x v24, a1
17361735
; RV32-NEXT: csrr a1, vlenb
17371736
; RV32-NEXT: slli a1, a1, 4
@@ -1743,16 +1742,16 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
17431742
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
17441743
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
17451744
; RV32-NEXT: vand.vv v24, v8, v24, v0.t
1746-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1747-
; RV32-NEXT: vmv.v.x v8, a3
1745+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1746+
; RV32-NEXT: vmv.v.x v8, a2
17481747
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17491748
; RV32-NEXT: vsll.vi v24, v24, 4, v0.t
17501749
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
17511750
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
17521751
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
17531752
; RV32-NEXT: vand.vv v24, v24, v8, v0.t
1754-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1755-
; RV32-NEXT: vmv.v.x v8, a4
1753+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1754+
; RV32-NEXT: vmv.v.x v8, a3
17561755
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
17571756
; RV32-NEXT: vsll.vi v24, v24, 2, v0.t
17581757
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
@@ -1902,35 +1901,34 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
19021901
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
19031902
; RV32-NEXT: vor.vv v8, v16, v8
19041903
; RV32-NEXT: lui a1, 61681
1905-
; RV32-NEXT: li a2, 32
1906-
; RV32-NEXT: lui a3, 209715
1907-
; RV32-NEXT: lui a4, 349525
1904+
; RV32-NEXT: lui a2, 209715
1905+
; RV32-NEXT: lui a3, 349525
19081906
; RV32-NEXT: addi a1, a1, -241
1909-
; RV32-NEXT: addi a3, a3, 819
1910-
; RV32-NEXT: addi a4, a4, 1365
1911-
; RV32-NEXT: csrr a5, vlenb
1912-
; RV32-NEXT: slli a5, a5, 3
1913-
; RV32-NEXT: add a5, sp, a5
1914-
; RV32-NEXT: addi a5, a5, 16
1915-
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
1907+
; RV32-NEXT: addi a2, a2, 819
1908+
; RV32-NEXT: addi a3, a3, 1365
1909+
; RV32-NEXT: csrr a4, vlenb
1910+
; RV32-NEXT: slli a4, a4, 3
1911+
; RV32-NEXT: add a4, sp, a4
1912+
; RV32-NEXT: addi a4, a4, 16
1913+
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
19161914
; RV32-NEXT: vor.vv v16, v16, v24
1917-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1915+
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
19181916
; RV32-NEXT: vmv.v.x v24, a1
19191917
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
19201918
; RV32-NEXT: vor.vv v8, v16, v8
19211919
; RV32-NEXT: vsrl.vi v16, v8, 4
19221920
; RV32-NEXT: vand.vv v8, v8, v24
19231921
; RV32-NEXT: vand.vv v16, v16, v24
1924-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1925-
; RV32-NEXT: vmv.v.x v24, a3
1922+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1923+
; RV32-NEXT: vmv.v.x v24, a2
19261924
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
19271925
; RV32-NEXT: vsll.vi v8, v8, 4
19281926
; RV32-NEXT: vor.vv v8, v16, v8
19291927
; RV32-NEXT: vsrl.vi v16, v8, 2
19301928
; RV32-NEXT: vand.vv v8, v8, v24
19311929
; RV32-NEXT: vand.vv v16, v16, v24
1932-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1933-
; RV32-NEXT: vmv.v.x v24, a4
1930+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1931+
; RV32-NEXT: vmv.v.x v24, a3
19341932
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
19351933
; RV32-NEXT: vsll.vi v8, v8, 2
19361934
; RV32-NEXT: vor.vv v8, v16, v8
@@ -2104,13 +2102,12 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
21042102
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
21052103
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
21062104
; RV32-NEXT: lui a1, 61681
2107-
; RV32-NEXT: li a2, 32
2108-
; RV32-NEXT: lui a3, 209715
2109-
; RV32-NEXT: lui a4, 349525
2105+
; RV32-NEXT: lui a2, 209715
2106+
; RV32-NEXT: lui a3, 349525
21102107
; RV32-NEXT: addi a1, a1, -241
2111-
; RV32-NEXT: addi a3, a3, 819
2112-
; RV32-NEXT: addi a4, a4, 1365
2113-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2108+
; RV32-NEXT: addi a2, a2, 819
2109+
; RV32-NEXT: addi a3, a3, 1365
2110+
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
21142111
; RV32-NEXT: vmv.v.x v24, a1
21152112
; RV32-NEXT: csrr a1, vlenb
21162113
; RV32-NEXT: slli a1, a1, 4
@@ -2122,16 +2119,16 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
21222119
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
21232120
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
21242121
; RV32-NEXT: vand.vv v24, v8, v24, v0.t
2125-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2126-
; RV32-NEXT: vmv.v.x v8, a3
2122+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2123+
; RV32-NEXT: vmv.v.x v8, a2
21272124
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
21282125
; RV32-NEXT: vsll.vi v24, v24, 4, v0.t
21292126
; RV32-NEXT: vor.vv v24, v16, v24, v0.t
21302127
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
21312128
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
21322129
; RV32-NEXT: vand.vv v24, v24, v8, v0.t
2133-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2134-
; RV32-NEXT: vmv.v.x v8, a4
2130+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2131+
; RV32-NEXT: vmv.v.x v8, a3
21352132
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
21362133
; RV32-NEXT: vsll.vi v24, v24, 2, v0.t
21372134
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
@@ -2281,35 +2278,34 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev
22812278
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
22822279
; RV32-NEXT: vor.vv v8, v16, v8
22832280
; RV32-NEXT: lui a1, 61681
2284-
; RV32-NEXT: li a2, 32
2285-
; RV32-NEXT: lui a3, 209715
2286-
; RV32-NEXT: lui a4, 349525
2281+
; RV32-NEXT: lui a2, 209715
2282+
; RV32-NEXT: lui a3, 349525
22872283
; RV32-NEXT: addi a1, a1, -241
2288-
; RV32-NEXT: addi a3, a3, 819
2289-
; RV32-NEXT: addi a4, a4, 1365
2290-
; RV32-NEXT: csrr a5, vlenb
2291-
; RV32-NEXT: slli a5, a5, 3
2292-
; RV32-NEXT: add a5, sp, a5
2293-
; RV32-NEXT: addi a5, a5, 16
2294-
; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
2284+
; RV32-NEXT: addi a2, a2, 819
2285+
; RV32-NEXT: addi a3, a3, 1365
2286+
; RV32-NEXT: csrr a4, vlenb
2287+
; RV32-NEXT: slli a4, a4, 3
2288+
; RV32-NEXT: add a4, sp, a4
2289+
; RV32-NEXT: addi a4, a4, 16
2290+
; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
22952291
; RV32-NEXT: vor.vv v16, v16, v24
2296-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2292+
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
22972293
; RV32-NEXT: vmv.v.x v24, a1
22982294
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
22992295
; RV32-NEXT: vor.vv v8, v16, v8
23002296
; RV32-NEXT: vsrl.vi v16, v8, 4
23012297
; RV32-NEXT: vand.vv v8, v8, v24
23022298
; RV32-NEXT: vand.vv v16, v16, v24
2303-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2304-
; RV32-NEXT: vmv.v.x v24, a3
2299+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2300+
; RV32-NEXT: vmv.v.x v24, a2
23052301
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
23062302
; RV32-NEXT: vsll.vi v8, v8, 4
23072303
; RV32-NEXT: vor.vv v8, v16, v8
23082304
; RV32-NEXT: vsrl.vi v16, v8, 2
23092305
; RV32-NEXT: vand.vv v8, v8, v24
23102306
; RV32-NEXT: vand.vv v16, v16, v24
2311-
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2312-
; RV32-NEXT: vmv.v.x v24, a4
2307+
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2308+
; RV32-NEXT: vmv.v.x v24, a3
23132309
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
23142310
; RV32-NEXT: vsll.vi v8, v8, 2
23152311
; RV32-NEXT: vor.vv v8, v16, v8

0 commit comments

Comments
 (0)