Skip to content

Commit 2188f21

Browse files
committed
[RISCV] Legalize misaligned unmasked vp.load/vp.store to vle8/vse8.
If vector-unaligned-mem support is not enabled, we should not generate loads/stores that are not aligned to their element size. We already do this for non-VP vector loads/stores. This code has been in our downstream for about a year and a half after finding the vectorizer generating misaligned loads/stores. I don't think that is unique to our downstream, but I'm not sure.
1 parent 87da620 commit 2188f21

File tree

5 files changed

+130
-4
lines changed

5 files changed

+130
-4
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6853,6 +6853,97 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
68536853
Store->getMemOperand()->getFlags());
68546854
}
68556855

6856+
// While RVV has alignment restrictions, we should always be able to load as a
6857+
// legal equivalently-sized byte-typed vector instead. This method is
6858+
// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
6859+
// the load is already correctly-aligned, it returns SDValue().
6860+
SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
6861+
SelectionDAG &DAG) const {
6862+
auto *Load = cast<VPLoadSDNode>(Op);
6863+
assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6864+
6865+
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
6866+
Load->getMemoryVT(),
6867+
*Load->getMemOperand()))
6868+
return SDValue();
6869+
6870+
SDValue Mask = Load->getMask();
6871+
6872+
// FIXME: Handled masked loads somehow.
6873+
if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
6874+
return SDValue();
6875+
6876+
SDLoc DL(Op);
6877+
MVT VT = Op.getSimpleValueType();
6878+
unsigned EltSizeBits = VT.getScalarSizeInBits();
6879+
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6880+
"Unexpected unaligned RVV load type");
6881+
MVT NewVT =
6882+
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6883+
assert(NewVT.isValid() &&
6884+
"Expecting equally-sized RVV vector types to be legal");
6885+
6886+
SDValue VL = Load->getVectorLength();
6887+
VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6888+
DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6889+
6890+
SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6891+
DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
6892+
Load->getPointerInfo(), Load->getBaseAlign(),
6893+
Load->getMemOperand()->getFlags(), AAMDNodes());
6894+
return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6895+
}
6896+
6897+
// While RVV has alignment restrictions, we should always be able to store as a
6898+
// legal equivalently-sized byte-typed vector instead. This method is
6899+
// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
6900+
// It returns SDValue() if the store is already correctly aligned.
6901+
SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
6902+
SelectionDAG &DAG) const {
6903+
auto *Store = cast<VPStoreSDNode>(Op);
6904+
assert(Store && Store->getValue().getValueType().isVector() &&
6905+
"Expected vector store");
6906+
6907+
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
6908+
Store->getMemoryVT(),
6909+
*Store->getMemOperand()))
6910+
return SDValue();
6911+
6912+
SDValue Mask = Store->getMask();
6913+
6914+
// FIXME: Handled masked stores somehow.
6915+
if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
6916+
return SDValue();
6917+
6918+
SDLoc DL(Op);
6919+
SDValue StoredVal = Store->getValue();
6920+
MVT VT = StoredVal.getSimpleValueType();
6921+
unsigned EltSizeBits = VT.getScalarSizeInBits();
6922+
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6923+
"Unexpected unaligned RVV store type");
6924+
MVT NewVT =
6925+
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6926+
assert(NewVT.isValid() &&
6927+
"Expecting equally-sized RVV vector types to be legal");
6928+
6929+
SDValue VL = Store->getVectorLength();
6930+
VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6931+
DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6932+
6933+
StoredVal = DAG.getBitcast(NewVT, StoredVal);
6934+
6935+
LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
6936+
MachineFunction &MF = DAG.getMachineFunction();
6937+
MachineMemOperand *MMO = MF.getMachineMemOperand(
6938+
Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
6939+
Store->getBaseAlign());
6940+
6941+
return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6942+
DAG.getUNDEF(Store->getBasePtr().getValueType()),
6943+
DAG.getAllOnesConstant(DL, Mask.getValueType()), VL,
6944+
NewVT, MMO, ISD::UNINDEXED);
6945+
}
6946+
68566947
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
68576948
const RISCVSubtarget &Subtarget) {
68586949
assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
@@ -8408,13 +8499,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
84088499
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
84098500
return Op;
84108501
}
8411-
case ISD::MLOAD:
84128502
case ISD::VP_LOAD:
8503+
if (SDValue V = expandUnalignedVPLoad(Op, DAG))
8504+
return V;
8505+
[[fallthrough]];
8506+
case ISD::MLOAD:
84138507
return lowerMaskedLoad(Op, DAG);
84148508
case ISD::VP_LOAD_FF:
84158509
return lowerLoadFF(Op, DAG);
8416-
case ISD::MSTORE:
84178510
case ISD::VP_STORE:
8511+
if (SDValue V = expandUnalignedVPStore(Op, DAG))
8512+
return V;
8513+
[[fallthrough]];
8514+
case ISD::MSTORE:
84188515
return lowerMaskedStore(Op, DAG);
84198516
case ISD::VECTOR_COMPRESS:
84208517
return lowerVectorCompress(Op, DAG);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,9 @@ class RISCVTargetLowering : public TargetLowering {
576576
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
577577
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
578578

579+
SDValue expandUnalignedVPLoad(SDValue Op, SelectionDAG &DAG) const;
580+
SDValue expandUnalignedVPStore(SDValue Op, SelectionDAG &DAG) const;
581+
579582
SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
580583
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
581584
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
180180
; CHECK-LABEL: shuffle1:
181181
; CHECK: # %bb.0:
182182
; CHECK-NEXT: addi a0, a0, 252
183-
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
184-
; CHECK-NEXT: vle32.v v10, (a0)
183+
; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
184+
; CHECK-NEXT: vle8.v v10, (a0)
185185
; CHECK-NEXT: vmv.v.i v0, 1
186186
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
187187
; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t

llvm/test/CodeGen/RISCV/rvv/vpload.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,19 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
612612
ret <vscale x 16 x double> %lo
613613
}
614614

615+
define <vscale x 1 x i64> @unaligned_vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
616+
; CHECK-LABEL: unaligned_vpload_nxv1i64_allones_mask:
617+
; CHECK: # %bb.0:
618+
; CHECK-NEXT: slli a1, a1, 3
619+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
620+
; CHECK-NEXT: vle8.v v8, (a0)
621+
; CHECK-NEXT: ret
622+
%a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
623+
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
624+
%load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(<vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
625+
ret <vscale x 1 x i64> %load
626+
}
627+
615628
define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
616629
; CHECK-LABEL: vpload_all_active_nxv8i8:
617630
; CHECK: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vpstore.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,19 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
511511
ret void
512512
}
513513

514+
define void @unaligned_vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, <vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
515+
; CHECK-LABEL: unaligned_vpstore_nxv1i64_allones_mask:
516+
; CHECK: # %bb.0:
517+
; CHECK-NEXT: slli a1, a1, 3
518+
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
519+
; CHECK-NEXT: vse8.v v8, (a0)
520+
; CHECK-NEXT: ret
521+
%a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
522+
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
523+
call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, <vscale x 1 x i64>* align 1 %ptr, <vscale x 1 x i1> %b, i32 %evl)
524+
ret void
525+
}
526+
514527
define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
515528
; CHECK-LABEL: vpstore_all_active_nxv8i8:
516529
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)