Skip to content

Commit 96e58b8

Browse files
authored
[RISCV] Legalize misaligned unmasked vp.load/vp.store to vle8/vse8. (#167745)
If vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size. We already do this for non-VP vector loads/stores. This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream. Doing this for masked vp.load/store requires widening the mask as well which is harder to do. NOTE: Because we have to scale the VL, this will introduce additional vsetvli and the VL optimizer will not be effective at optimizing any arithmetic that is consumed by the store.
1 parent 4ab2423 commit 96e58b8

File tree

6 files changed

+141
-6
lines changed

6 files changed

+141
-6
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9913,8 +9913,6 @@ SDValue SelectionDAG::getLoadVP(
99139913
MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
99149914
MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
99159915
const MDNode *Ranges, bool IsExpanding) {
9916-
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
9917-
99189916
MMOFlags |= MachineMemOperand::MOLoad;
99199917
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
99209918
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
@@ -9936,6 +9934,11 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
99369934
SDValue Offset, SDValue Mask, SDValue EVL,
99379935
EVT MemVT, MachineMemOperand *MMO,
99389936
bool IsExpanding) {
9937+
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
9938+
assert(Mask.getValueType().getVectorElementCount() ==
9939+
VT.getVectorElementCount() &&
9940+
"Vector width mismatch between mask and data");
9941+
99399942
bool Indexed = AM != ISD::UNINDEXED;
99409943
assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
99419944

@@ -10031,6 +10034,10 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
1003110034
ISD::MemIndexedMode AM, bool IsTruncating,
1003210035
bool IsCompressing) {
1003310036
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
10037+
assert(Mask.getValueType().getVectorElementCount() ==
10038+
Val.getValueType().getVectorElementCount() &&
10039+
"Vector width mismatch between mask and data");
10040+
1003410041
bool Indexed = AM != ISD::UNINDEXED;
1003510042
assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
1003610043
SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6846,6 +6846,99 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
68466846
Store->getMemOperand()->getFlags());
68476847
}
68486848

6849+
// While RVV has alignment restrictions, we should always be able to load as a
6850+
// legal equivalently-sized byte-typed vector instead. This method is
6851+
// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
6852+
// the load is already correctly-aligned, it returns SDValue().
6853+
SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
6854+
SelectionDAG &DAG) const {
6855+
auto *Load = cast<VPLoadSDNode>(Op);
6856+
assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6857+
6858+
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
6859+
Load->getMemoryVT(),
6860+
*Load->getMemOperand()))
6861+
return SDValue();
6862+
6863+
SDValue Mask = Load->getMask();
6864+
6865+
// FIXME: Handled masked loads somehow.
6866+
if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
6867+
return SDValue();
6868+
6869+
SDLoc DL(Op);
6870+
MVT VT = Op.getSimpleValueType();
6871+
unsigned EltSizeBits = VT.getScalarSizeInBits();
6872+
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6873+
"Unexpected unaligned RVV load type");
6874+
MVT NewVT =
6875+
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6876+
assert(NewVT.isValid() &&
6877+
"Expecting equally-sized RVV vector types to be legal");
6878+
6879+
SDValue VL = Load->getVectorLength();
6880+
VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6881+
DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6882+
6883+
MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
6884+
SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6885+
DAG.getAllOnesConstant(DL, MaskVT), VL,
6886+
Load->getPointerInfo(), Load->getBaseAlign(),
6887+
Load->getMemOperand()->getFlags(), AAMDNodes());
6888+
return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6889+
}
6890+
6891+
// While RVV has alignment restrictions, we should always be able to store as a
6892+
// legal equivalently-sized byte-typed vector instead. This method is
6893+
// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
6894+
// It returns SDValue() if the store is already correctly aligned.
6895+
SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
6896+
SelectionDAG &DAG) const {
6897+
auto *Store = cast<VPStoreSDNode>(Op);
6898+
assert(Store && Store->getValue().getValueType().isVector() &&
6899+
"Expected vector store");
6900+
6901+
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
6902+
Store->getMemoryVT(),
6903+
*Store->getMemOperand()))
6904+
return SDValue();
6905+
6906+
SDValue Mask = Store->getMask();
6907+
6908+
// FIXME: Handled masked stores somehow.
6909+
if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
6910+
return SDValue();
6911+
6912+
SDLoc DL(Op);
6913+
SDValue StoredVal = Store->getValue();
6914+
MVT VT = StoredVal.getSimpleValueType();
6915+
unsigned EltSizeBits = VT.getScalarSizeInBits();
6916+
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6917+
"Unexpected unaligned RVV store type");
6918+
MVT NewVT =
6919+
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6920+
assert(NewVT.isValid() &&
6921+
"Expecting equally-sized RVV vector types to be legal");
6922+
6923+
SDValue VL = Store->getVectorLength();
6924+
VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6925+
DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6926+
6927+
StoredVal = DAG.getBitcast(NewVT, StoredVal);
6928+
6929+
LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
6930+
MachineFunction &MF = DAG.getMachineFunction();
6931+
MachineMemOperand *MMO = MF.getMachineMemOperand(
6932+
Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
6933+
Store->getBaseAlign());
6934+
6935+
MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
6936+
return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6937+
DAG.getUNDEF(Store->getBasePtr().getValueType()),
6938+
DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
6939+
ISD::UNINDEXED);
6940+
}
6941+
68496942
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
68506943
const RISCVSubtarget &Subtarget) {
68516944
assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
@@ -8401,13 +8494,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
84018494
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
84028495
return Op;
84038496
}
8404-
case ISD::MLOAD:
84058497
case ISD::VP_LOAD:
8498+
if (SDValue V = expandUnalignedVPLoad(Op, DAG))
8499+
return V;
8500+
[[fallthrough]];
8501+
case ISD::MLOAD:
84068502
return lowerMaskedLoad(Op, DAG);
84078503
case ISD::VP_LOAD_FF:
84088504
return lowerLoadFF(Op, DAG);
8409-
case ISD::MSTORE:
84108505
case ISD::VP_STORE:
8506+
if (SDValue V = expandUnalignedVPStore(Op, DAG))
8507+
return V;
8508+
[[fallthrough]];
8509+
case ISD::MSTORE:
84118510
return lowerMaskedStore(Op, DAG);
84128511
case ISD::VECTOR_COMPRESS:
84138512
return lowerVectorCompress(Op, DAG);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering {
576576
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
577577
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
578578

579+
SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const;
580+
SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const;
581+
579582
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
580583
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
581584
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
180180
; CHECK-LABEL: shuffle1:
181181
; CHECK: # %bb.0:
182182
; CHECK-NEXT: addi a0, a0, 252
183-
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
184-
; CHECK-NEXT: vle32.v v10, (a0)
183+
; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
184+
; CHECK-NEXT: vle8.v v10, (a0)
185185
; CHECK-NEXT: vmv.v.i v0, 1
186186
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
187187
; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t

llvm/test/CodeGen/RISCV/rvv/vpload.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,19 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
612612
ret <vscale x 16 x double> %lo
613613
}
614614

615+
define <vscale x 1 x i64> @unaligned_vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
616+
; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask:
617+
; CHECK: # %bb.0:
618+
; CHECK-NEXT: slli a1, a1, 3
619+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
620+
; CHECK-NEXT: vle8.v v8, (a0)
621+
; CHECK-NEXT: ret
622+
%a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
623+
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
624+
%load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(<vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
625+
ret <vscale x 1 x i64> %load
626+
}
627+
615628
define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
616629
; CHECK-LABEL: vpload_all_active_nxv8i8:
617630
; CHECK: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vpstore.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,19 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
511511
ret void
512512
}
513513

514+
define void @unaligned_vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, <vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
515+
; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask:
516+
; CHECK: # %bb.0:
517+
; CHECK-NEXT: slli a1, a1, 3
518+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
519+
; CHECK-NEXT: vse8.v v8, (a0)
520+
; CHECK-NEXT: ret
521+
%a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
522+
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
523+
call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, <vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
524+
ret void
525+
}
526+
514527
define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
515528
; CHECK-LABEL: vpstore_all_active_nxv8i8:
516529
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)