Skip to content

Commit ee0fd27

Browse files
committed
Revert "Split LMUL 8 case manually"
This reverts commit 661f7ac8c8bb0e1fa4e50632f30aef3b3b7d0ee1.
1 parent 746a86a commit ee0fd27

File tree

3 files changed

+7
-188
lines changed

3 files changed

+7
-188
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -11096,95 +11096,6 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
1109611096
Store->getMemoryVT(), Store->getMemOperand());
1109711097
}
1109811098

11099-
SDValue RISCVTargetLowering::splitMaskedExpandingLoad(SDValue Op,
11100-
SelectionDAG &DAG) const {
11101-
SDLoc DL(Op);
11102-
MVT VT = Op.getSimpleValueType();
11103-
auto *MLD = cast<MaskedLoadSDNode>(Op);
11104-
MVT XLenVT = Subtarget.getXLenVT();
11105-
auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MLD->getValueType(0));
11106-
11107-
SDValue Chain = MLD->getChain();
11108-
SDValue Ptr = MLD->getBasePtr();
11109-
SDValue Offset = MLD->getOffset();
11110-
SDValue Mask = MLD->getMask();
11111-
SDValue Passthru = MLD->getPassThru();
11112-
Align Alignment = MLD->getOriginalAlign();
11113-
ISD::LoadExtType ExtType = MLD->getExtensionType();
11114-
11115-
// Split Mask operand
11116-
auto [MaskLo, MaskHi] = DAG.SplitVector(Mask, DL);
11117-
11118-
EVT MemoryVT = MLD->getMemoryVT();
11119-
bool HiIsEmpty = false;
11120-
auto [LoMemVT, HiMemVT] =
11121-
DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
11122-
11123-
// Split PassThru operand
11124-
auto [PassthruLo, PassthruHi] = DAG.SplitVector(Passthru, DL);
11125-
11126-
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
11127-
MLD->getPointerInfo(), MachineMemOperand::MOLoad,
11128-
LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(),
11129-
MLD->getRanges());
11130-
11131-
SDValue Lo, Hi;
11132-
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, Offset, MaskLo, PassthruLo,
11133-
LoMemVT, MMO, MLD->getAddressingMode(), ExtType,
11134-
/*IsExpanding=*/true);
11135-
11136-
if (HiIsEmpty) {
11137-
// The hi masked load has zero storage size. We therefore simply set it to
11138-
// the low masked load and rely on subsequent removal from the chain.
11139-
Hi = Lo;
11140-
} else {
11141-
EVT MaskVT = MaskLo.getValueType();
11142-
SDValue VL = DAG.getConstant(MaskVT.getVectorNumElements(), DL, XLenVT);
11143-
11144-
MVT MaskContainerVT =
11145-
getContainerForFixedLengthVector(MaskVT.getSimpleVT());
11146-
MaskLo = convertToScalableVector(MaskContainerVT, MaskLo, DAG, Subtarget);
11147-
11148-
SDValue Increment = DAG.getNode(
11149-
RISCVISD::VCPOP_VL, DL, XLenVT, MaskLo,
11150-
getAllOnesMask(MaskLo.getSimpleValueType(), VL, DL, DAG), VL);
11151-
11152-
// Scale is an element size in bytes.
11153-
SDValue Scale =
11154-
DAG.getConstant(LoMemVT.getScalarSizeInBits() / 8, DL, XLenVT);
11155-
Increment = DAG.getNode(ISD::MUL, DL, XLenVT, Increment, Scale);
11156-
11157-
Ptr = DAG.getNode(ISD::ADD, DL, XLenVT, Ptr, Increment);
11158-
11159-
MachinePointerInfo MPI;
11160-
if (LoMemVT.isScalableVector())
11161-
MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
11162-
else
11163-
MPI = MLD->getPointerInfo().getWithOffset(
11164-
LoMemVT.getStoreSize().getFixedValue());
11165-
11166-
MMO = DAG.getMachineFunction().getMachineMemOperand(
11167-
MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(),
11168-
Alignment, MLD->getAAInfo(), MLD->getRanges());
11169-
11170-
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, Offset, MaskHi, PassthruHi,
11171-
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
11172-
/*IsExpanding=*/true);
11173-
}
11174-
11175-
// Build a factor node to remember that this load is independent of the
11176-
// other one.
11177-
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
11178-
Hi.getValue(1));
11179-
11180-
// Legalize the chain result - switch anything that used the old chain to
11181-
// use the new one.
11182-
DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
11183-
11184-
return DAG.getMergeValues(
11185-
{DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi), Chain}, DL);
11186-
}
11187-
1118811099
SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
1118911100
SelectionDAG &DAG) const {
1119011101
SDLoc DL(Op);
@@ -11240,10 +11151,9 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
1124011151
// should change the element type of index vector to i16 to avoid overflow.
1124111152
if (IndexEltVT == MVT::i8 &&
1124211153
VT.getVectorElementCount().getKnownMinValue() > 256) {
11243-
// If this will result in illegal types, we split it into two loads.
11244-
if (getLMUL(IndexVT) == RISCVII::LMUL_8)
11245-
return splitMaskedExpandingLoad(Op, DAG);
11246-
11154+
// FIXME: Don't know how to make LMUL==8 case legal.
11155+
assert(getLMUL(IndexVT) != RISCVII::LMUL_8 &&
11156+
"We don't know how to lower LMUL==8 case");
1124711157
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
1124811158
}
1124911159

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -995,8 +995,6 @@ class RISCVTargetLowering : public TargetLowering {
995995
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
996996
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
997997

998-
SDValue splitMaskedExpandingLoad(SDValue Op, SelectionDAG &DAG) const;
999-
1000998
bool isEligibleForTailCallOptimization(
1001999
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
10021000
const SmallVector<CCValAssign, 16> &ArgLocs) const;

llvm/test/CodeGen/RISCV/rvv/expandload.ll

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,99 +1215,10 @@ define <512 x i8> @test_expandload_v512i8(ptr %base, <512 x i1> %mask, <512 x i8
12151215
}
12161216

12171217
; FIXME: Don't know how to make it legal.
1218-
define <1024 x i8> @test_expandload_v1024i8(ptr %base, <1024 x i1> %mask, <1024 x i8> %passthru) "target-features"="+zvl1024b" {
1219-
; CHECK-RV32-LABEL: test_expandload_v1024i8:
1220-
; CHECK-RV32: # %bb.0:
1221-
; CHECK-RV32-NEXT: addi sp, sp, -16
1222-
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
1223-
; CHECK-RV32-NEXT: csrr a1, vlenb
1224-
; CHECK-RV32-NEXT: slli a1, a1, 3
1225-
; CHECK-RV32-NEXT: sub sp, sp, a1
1226-
; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1227-
; CHECK-RV32-NEXT: vmv1r.v v7, v0
1228-
; CHECK-RV32-NEXT: li a1, 512
1229-
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
1230-
; CHECK-RV32-NEXT: vcpop.m a2, v0
1231-
; CHECK-RV32-NEXT: add a2, a0, a2
1232-
; CHECK-RV32-NEXT: li a3, 64
1233-
; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, ta, ma
1234-
; CHECK-RV32-NEXT: vslidedown.vx v0, v0, a3
1235-
; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
1236-
; CHECK-RV32-NEXT: viota.m v16, v0
1237-
; CHECK-RV32-NEXT: addi a3, sp, 16
1238-
; CHECK-RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1239-
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma
1240-
; CHECK-RV32-NEXT: vslidedown.vx v24, v8, a1
1241-
; CHECK-RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1242-
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, mu
1243-
; CHECK-RV32-NEXT: vluxei16.v v24, (a2), v16, v0.t
1244-
; CHECK-RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
1245-
; CHECK-RV32-NEXT: viota.m v16, v7
1246-
; CHECK-RV32-NEXT: vmv1r.v v0, v7
1247-
; CHECK-RV32-NEXT: vsetvli zero, zero, e8, m4, ta, mu
1248-
; CHECK-RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
1249-
; CHECK-RV32-NEXT: li a0, 1024
1250-
; CHECK-RV32-NEXT: vsetvli zero, a0, e8, m8, ta, ma
1251-
; CHECK-RV32-NEXT: vslideup.vx v8, v24, a1
1252-
; CHECK-RV32-NEXT: csrr a0, vlenb
1253-
; CHECK-RV32-NEXT: slli a0, a0, 3
1254-
; CHECK-RV32-NEXT: add sp, sp, a0
1255-
; CHECK-RV32-NEXT: addi sp, sp, 16
1256-
; CHECK-RV32-NEXT: ret
1257-
;
1258-
; CHECK-RV64-LABEL: test_expandload_v1024i8:
1259-
; CHECK-RV64: # %bb.0:
1260-
; CHECK-RV64-NEXT: addi sp, sp, -16
1261-
; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
1262-
; CHECK-RV64-NEXT: csrr a1, vlenb
1263-
; CHECK-RV64-NEXT: slli a1, a1, 4
1264-
; CHECK-RV64-NEXT: sub sp, sp, a1
1265-
; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1266-
; CHECK-RV64-NEXT: vmv1r.v v7, v0
1267-
; CHECK-RV64-NEXT: li a1, 512
1268-
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
1269-
; CHECK-RV64-NEXT: vcpop.m a2, v0
1270-
; CHECK-RV64-NEXT: add a2, a0, a2
1271-
; CHECK-RV64-NEXT: li a3, 64
1272-
; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma
1273-
; CHECK-RV64-NEXT: vslidedown.vx v0, v0, a3
1274-
; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
1275-
; CHECK-RV64-NEXT: viota.m v24, v0
1276-
; CHECK-RV64-NEXT: csrr a3, vlenb
1277-
; CHECK-RV64-NEXT: slli a3, a3, 3
1278-
; CHECK-RV64-NEXT: add a3, sp, a3
1279-
; CHECK-RV64-NEXT: addi a3, a3, 16
1280-
; CHECK-RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1281-
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m8, ta, ma
1282-
; CHECK-RV64-NEXT: vslidedown.vx v16, v8, a1
1283-
; CHECK-RV64-NEXT: addi a3, sp, 16
1284-
; CHECK-RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1285-
; CHECK-RV64-NEXT: csrr a3, vlenb
1286-
; CHECK-RV64-NEXT: slli a3, a3, 3
1287-
; CHECK-RV64-NEXT: add a3, sp, a3
1288-
; CHECK-RV64-NEXT: addi a3, a3, 16
1289-
; CHECK-RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1290-
; CHECK-RV64-NEXT: addi a3, sp, 16
1291-
; CHECK-RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1292-
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, mu
1293-
; CHECK-RV64-NEXT: vluxei16.v v16, (a2), v24, v0.t
1294-
; CHECK-RV64-NEXT: vmv.v.v v24, v16
1295-
; CHECK-RV64-NEXT: vsetvli zero, zero, e16, m8, ta, ma
1296-
; CHECK-RV64-NEXT: viota.m v16, v7
1297-
; CHECK-RV64-NEXT: vmv1r.v v0, v7
1298-
; CHECK-RV64-NEXT: vsetvli zero, zero, e8, m4, ta, mu
1299-
; CHECK-RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
1300-
; CHECK-RV64-NEXT: li a0, 1024
1301-
; CHECK-RV64-NEXT: vsetvli zero, a0, e8, m8, ta, ma
1302-
; CHECK-RV64-NEXT: vslideup.vx v8, v24, a1
1303-
; CHECK-RV64-NEXT: csrr a0, vlenb
1304-
; CHECK-RV64-NEXT: slli a0, a0, 4
1305-
; CHECK-RV64-NEXT: add sp, sp, a0
1306-
; CHECK-RV64-NEXT: addi sp, sp, 16
1307-
; CHECK-RV64-NEXT: ret
1308-
%res = call <1024 x i8> @llvm.masked.expandload.v1024i8(ptr align 1 %base, <1024 x i1> %mask, <1024 x i8> %passthru)
1309-
ret <1024 x i8> %res
1310-
}
1218+
; define <1024 x i8> @test_expandload_v1024i8(ptr %base, <1024 x i1> %mask, <1024 x i8> %passthru) "target-features"="+zvl1024b" {
1219+
; %res = call <1024 x i8> @llvm.masked.expandload.v1024i8(ptr align 1 %base, <1024 x i1> %mask, <1024 x i8> %passthru)
1220+
; ret <1024 x i8> %res
1221+
; }
13111222

13121223
declare <512 x i8> @llvm.masked.expandload.v512i8(ptr, <512 x i1>, <512 x i8>)
13131224
declare <1024 x i8> @llvm.masked.expandload.v1024i8(ptr, <1024 x i1>, <1024 x i8>)

0 commit comments

Comments
 (0)