@@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
8181 Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
8282 Intrinsic::riscv_seg8_store_mask};
8383
84+ static const Intrinsic::ID FixedVsssegIntrIds[] = {
85+ Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
86+ Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
87+ Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
88+ Intrinsic::riscv_sseg8_store_mask};
89+
8490static const Intrinsic::ID ScalableVssegIntrIds[] = {
8591 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
8692 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
275281bool RISCVTargetLowering::lowerInterleavedStore (Instruction *Store,
276282 Value *LaneMask,
277283 ShuffleVectorInst *SVI,
278- unsigned Factor) const {
284+ unsigned Factor,
285+ const APInt &GapMask) const {
286+ assert (GapMask.getBitWidth () == Factor);
287+
288+ // We only support cases where the skipped fields are the trailing ones.
289+ // TODO: Lower to strided store if there is only a single active field.
290+ unsigned MaskFactor = GapMask.popcount ();
291+ if (MaskFactor < 2 || !GapMask.isMask ())
292+ return false ;
293+
279294 IRBuilder<> Builder (Store);
280295 const DataLayout &DL = Store->getDataLayout ();
281296 auto Mask = SVI->getShuffleMask ();
@@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
287302
288303 Value *Ptr, *VL;
289304 Align Alignment;
290- if (!getMemOperands (Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
305+ if (!getMemOperands (MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
306+ Alignment))
291307 return false ;
292308
293309 Type *PtrTy = Ptr->getType ();
294310 unsigned AS = PtrTy->getPointerAddressSpace ();
295- if (!isLegalInterleavedAccessType (VTy, Factor , Alignment, AS, DL))
311+ if (!isLegalInterleavedAccessType (VTy, MaskFactor , Alignment, AS, DL))
296312 return false ;
297313
298- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
299- Store->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
314+ Function *SegStoreFunc;
315+ if (MaskFactor < Factor)
316+ // Strided segmented store.
317+ SegStoreFunc = Intrinsic::getOrInsertDeclaration (
318+ Store->getModule (), FixedVsssegIntrIds[MaskFactor - 2 ],
319+ {VTy, PtrTy, XLenTy, XLenTy});
320+ else
321+ // Normal segmented store.
322+ SegStoreFunc = Intrinsic::getOrInsertDeclaration (
323+ Store->getModule (), FixedVssegIntrIds[Factor - 2 ],
324+ {VTy, PtrTy, XLenTy});
300325
301326 SmallVector<Value *, 10 > Ops;
302327 SmallVector<int , 16 > NewShuffleMask;
303328
304- for (unsigned i = 0 ; i < Factor ; i++) {
329+ for (unsigned i = 0 ; i < MaskFactor ; i++) {
305330 // Collect shuffle mask for this lane.
306331 for (unsigned j = 0 ; j < VTy->getNumElements (); j++)
307332 NewShuffleMask.push_back (Mask[i + Factor * j]);
@@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
312337
313338 NewShuffleMask.clear ();
314339 }
315- Ops.append ({Ptr, LaneMask, VL});
316- Builder.CreateCall (VssegNFunc, Ops);
340+ Ops.push_back (Ptr);
341+ if (MaskFactor < Factor) {
342+ // Insert the stride argument.
343+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize (VTy->getElementType ());
344+ Ops.push_back (ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes));
345+ }
346+ Ops.append ({LaneMask, VL});
347+ Builder.CreateCall (SegStoreFunc, Ops);
317348
318349 return true ;
319350}
0 commit comments