@@ -266,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
266266// /
267267// / Note that the new shufflevectors will be removed and we'll only generate one
268268// / vsseg3 instruction in CodeGen.
269- bool  RISCVTargetLowering::lowerInterleavedStore (StoreInst *SI,
269+ bool  RISCVTargetLowering::lowerInterleavedStore (Instruction *Store,
270+                                                 Value *LaneMask,
270271                                                ShuffleVectorInst *SVI,
271272                                                unsigned  Factor) const  {
272-   IRBuilder<> Builder (SI );
273-   const  DataLayout &DL = SI ->getDataLayout ();
273+   IRBuilder<> Builder (Store );
274+   const  DataLayout &DL = Store ->getDataLayout ();
274275  auto  Mask = SVI->getShuffleMask ();
275276  auto  *ShuffleVTy = cast<FixedVectorType>(SVI->getType ());
276277  //  Given SVI : <n*factor x ty>, then VTy : <n x ty>
277278  auto  *VTy = FixedVectorType::get (ShuffleVTy->getElementType (),
278279                                   ShuffleVTy->getNumElements () / Factor);
279-   if  (!isLegalInterleavedAccessType (VTy, Factor, SI->getAlign (),
280-                                     SI->getPointerAddressSpace (), DL))
280+   auto  *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
281+ 
282+   Value *Ptr, *VL;
283+   Align Alignment;
284+   if  (!getMemOperands (Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
281285    return  false ;
282286
283-   auto  *PtrTy = SI->getPointerOperandType ();
284-   auto  *XLenTy = Type::getIntNTy (SI->getContext (), Subtarget.getXLen ());
287+   Type *PtrTy = Ptr->getType ();
288+   unsigned  AS = PtrTy->getPointerAddressSpace ();
289+   if  (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
290+     return  false ;
285291
286292  unsigned  Index;
287293  //  If the segment store only has one active lane (i.e. the interleave is
@@ -292,27 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
292298    unsigned  ScalarSizeInBytes =
293299        DL.getTypeStoreSize (ShuffleVTy->getElementType ());
294300    Value *Data = SVI->getOperand (0 );
295-     auto  *DataVTy = cast<FixedVectorType>( Data-> getType ( ));
301+     Data = Builder. CreateExtractVector (VTy,  Data,  uint64_t ( 0 ));
296302    Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
297303    Value *Offset = ConstantInt::get (XLenTy, Index * ScalarSizeInBytes);
298-     Value *BasePtr = Builder.CreatePtrAdd (SI->getPointerOperand (), Offset);
299-     Value *Mask = Builder.getAllOnesMask (DataVTy->getElementCount ());
300-     Value *VL = Builder.CreateElementCount (Builder.getInt32Ty (),
301-                                            VTy->getElementCount ());
302- 
303-     CallInst *CI = Builder.CreateIntrinsic (
304-         Intrinsic::experimental_vp_strided_store,
305-         {Data->getType (), BasePtr->getType (), Stride->getType ()},
306-         {Data, BasePtr, Stride, Mask, VL});
307-     Align Alignment = commonAlignment (SI->getAlign (), Index * ScalarSizeInBytes);
308-     CI->addParamAttr (
309-         1 , Attribute::getWithAlignment (CI->getContext (), Alignment));
304+     Value *BasePtr = Builder.CreatePtrAdd (Ptr, Offset);
305+     //  Note: Same VL as above, but i32 not xlen due to signature of
306+     //  vp.strided.store
307+     VL = Builder.CreateElementCount (Builder.getInt32Ty (),
308+                                     VTy->getElementCount ());
310309
310+     CallInst *CI =
311+         Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_store,
312+                                 {VTy, BasePtr->getType (), Stride->getType ()},
313+                                 {Data, BasePtr, Stride, LaneMask, VL});
314+     Alignment = commonAlignment (Alignment, Index * ScalarSizeInBytes);
315+     CI->addParamAttr (1 ,
316+                      Attribute::getWithAlignment (CI->getContext (), Alignment));
311317    return  true ;
312318  }
313319
314320  Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
315-       SI ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
321+       Store ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
316322
317323  SmallVector<Value *, 10 > Ops;
318324  SmallVector<int , 16 > NewShuffleMask;
@@ -328,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
328334
329335    NewShuffleMask.clear ();
330336  }
331-   //  This VL should be OK (should be executable in one vsseg instruction,
332-   //  potentially under larger LMULs) because we checked that the fixed vector
333-   //  type fits in isLegalInterleavedAccessType
334-   Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
335-   Value *StoreMask = Builder.getAllOnesMask (VTy->getElementCount ());
336-   Ops.append ({SI->getPointerOperand (), StoreMask, VL});
337- 
337+   Ops.append ({Ptr, LaneMask, VL});
338338  Builder.CreateCall (VssegNFunc, Ops);
339339
340340  return  true ;
@@ -457,91 +457,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
457457  Builder.CreateCall (VssegNFunc, Operands);
458458  return  true ;
459459}
460- 
461- // / Lower an interleaved vp.store into a vssegN intrinsic.
462- // /
463- // / E.g. Lower an interleaved vp.store (Factor = 2):
464- // /
465- // /   %is = tail call <vscale x 64 x i8>
466- // /             @llvm.vector.interleave2.nxv64i8(
467- // /                               <vscale x 32 x i8> %load0,
468- // /                               <vscale x 32 x i8> %load1
469- // /   %wide.rvl = shl nuw nsw i32 %rvl, 1
470- // /   tail call void @llvm.vp.store.nxv64i8.p0(
471- // /                               <vscale x 64 x i8> %is, ptr %ptr,
472- // /                               %mask,
473- // /                               i32 %wide.rvl)
474- // /
475- // / Into:
476- // /   call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
477- // /                               <vscale x 32 x i8> %load1,
478- // /                               <vscale x 32 x i8> %load2, ptr %ptr,
479- // /                               %mask,
480- // /                               i64 %rvl)
481- bool  RISCVTargetLowering::lowerInterleavedVPStore (
482-     VPIntrinsic *Store, Value *Mask,
483-     ArrayRef<Value *> InterleaveOperands) const  {
484-   assert (Mask && " Expect a valid mask"  );
485-   assert (Store->getIntrinsicID () == Intrinsic::vp_store &&
486-          " Unexpected intrinsic"  );
487- 
488-   const  unsigned  Factor = InterleaveOperands.size ();
489- 
490-   auto  *VTy = dyn_cast<VectorType>(InterleaveOperands[0 ]->getType ());
491-   if  (!VTy)
492-     return  false ;
493- 
494-   const  DataLayout &DL = Store->getDataLayout ();
495-   Align Alignment = Store->getParamAlign (1 ).value_or (
496-       DL.getABITypeAlign (VTy->getElementType ()));
497-   if  (!isLegalInterleavedAccessType (
498-           VTy, Factor, Alignment,
499-           Store->getArgOperand (1 )->getType ()->getPointerAddressSpace (), DL))
500-     return  false ;
501- 
502-   IRBuilder<> Builder (Store);
503-   Value *WideEVL = Store->getArgOperand (3 );
504-   //  Conservatively check if EVL is a multiple of factor, otherwise some
505-   //  (trailing) elements might be lost after the transformation.
506-   if  (!isMultipleOfN (WideEVL, Store->getDataLayout (), Factor))
507-     return  false ;
508- 
509-   auto  *PtrTy = Store->getArgOperand (1 )->getType ();
510-   auto  *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
511-   auto  *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
512-   Value *EVL =
513-       Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
514- 
515-   if  (isa<FixedVectorType>(VTy)) {
516-     SmallVector<Value *, 8 > Operands (InterleaveOperands);
517-     Operands.append ({Store->getArgOperand (1 ), Mask, EVL});
518-     Builder.CreateIntrinsic (FixedVssegIntrIds[Factor - 2 ],
519-                             {VTy, PtrTy, XLenTy}, Operands);
520-     return  true ;
521-   }
522- 
523-   unsigned  SEW = DL.getTypeSizeInBits (VTy->getElementType ());
524-   unsigned  NumElts = VTy->getElementCount ().getKnownMinValue ();
525-   Type *VecTupTy = TargetExtType::get (
526-       Store->getContext (), " riscv.vector.tuple"  ,
527-       ScalableVectorType::get (Type::getInt8Ty (Store->getContext ()),
528-                               NumElts * SEW / 8 ),
529-       Factor);
530- 
531-   Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration (
532-       Store->getModule (), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
533-   Value *StoredVal = PoisonValue::get (VecTupTy);
534-   for  (unsigned  i = 0 ; i < Factor; ++i)
535-     StoredVal = Builder.CreateCall (
536-         VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32 (i)});
537- 
538-   Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
539-       Store->getModule (), ScalableVssegIntrIds[Factor - 2 ],
540-       {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
541- 
542-   Value *Operands[] = {StoredVal, Store->getArgOperand (1 ), Mask, EVL,
543-                        ConstantInt::get (XLenTy, Log2_64 (SEW))};
544- 
545-   Builder.CreateCall (VssegNFunc, Operands);
546-   return  true ;
547- }
0 commit comments