@@ -266,22 +266,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
266266// /
267267// / Note that the new shufflevectors will be removed and we'll only generate one
268268// / vsseg3 instruction in CodeGen.
269- bool RISCVTargetLowering::lowerInterleavedStore (StoreInst *SI,
269+ bool RISCVTargetLowering::lowerInterleavedStore (Instruction *Store,
270+ Value *LaneMask,
270271 ShuffleVectorInst *SVI,
271272 unsigned Factor) const {
272- IRBuilder<> Builder (SI );
273- const DataLayout &DL = SI ->getDataLayout ();
273+ IRBuilder<> Builder (Store );
274+ const DataLayout &DL = Store ->getDataLayout ();
274275 auto Mask = SVI->getShuffleMask ();
275276 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType ());
276277 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
277278 auto *VTy = FixedVectorType::get (ShuffleVTy->getElementType (),
278279 ShuffleVTy->getNumElements () / Factor);
279- if (!isLegalInterleavedAccessType (VTy, Factor, SI->getAlign (),
280- SI->getPointerAddressSpace (), DL))
280+ auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
281+
282+ Value *Ptr, *VL;
283+ Align Alignment;
284+ if (!getMemOperands (Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
281285 return false ;
282286
283- auto *PtrTy = SI->getPointerOperandType ();
284- auto *XLenTy = Type::getIntNTy (SI->getContext (), Subtarget.getXLen ());
287+ Type *PtrTy = Ptr->getType ();
288+ unsigned AS = PtrTy->getPointerAddressSpace ();
289+ if (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
290+ return false ;
285291
286292 unsigned Index;
287293 // If the segment store only has one active lane (i.e. the interleave is
@@ -292,27 +298,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
292298 unsigned ScalarSizeInBytes =
293299 DL.getTypeStoreSize (ShuffleVTy->getElementType ());
294300 Value *Data = SVI->getOperand (0 );
295- auto *DataVTy = cast<FixedVectorType>( Data-> getType ( ));
301+ Data = Builder. CreateExtractVector (VTy, Data, uint64_t ( 0 ));
296302 Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
297303 Value *Offset = ConstantInt::get (XLenTy, Index * ScalarSizeInBytes);
298- Value *BasePtr = Builder.CreatePtrAdd (SI->getPointerOperand (), Offset);
299- Value *Mask = Builder.getAllOnesMask (DataVTy->getElementCount ());
300- Value *VL = Builder.CreateElementCount (Builder.getInt32Ty (),
301- VTy->getElementCount ());
302-
303- CallInst *CI = Builder.CreateIntrinsic (
304- Intrinsic::experimental_vp_strided_store,
305- {Data->getType (), BasePtr->getType (), Stride->getType ()},
306- {Data, BasePtr, Stride, Mask, VL});
307- Align Alignment = commonAlignment (SI->getAlign (), Index * ScalarSizeInBytes);
308- CI->addParamAttr (
309- 1 , Attribute::getWithAlignment (CI->getContext (), Alignment));
304+ Value *BasePtr = Builder.CreatePtrAdd (Ptr, Offset);
305+ // Note: Same VL as above, but i32 not xlen due to signature of
306+ // vp.strided.store
307+ VL = Builder.CreateElementCount (Builder.getInt32Ty (),
308+ VTy->getElementCount ());
310309
310+ CallInst *CI =
311+ Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_store,
312+ {VTy, BasePtr->getType (), Stride->getType ()},
313+ {Data, BasePtr, Stride, LaneMask, VL});
314+ Alignment = commonAlignment (Alignment, Index * ScalarSizeInBytes);
315+ CI->addParamAttr (1 ,
316+ Attribute::getWithAlignment (CI->getContext (), Alignment));
311317 return true ;
312318 }
313319
314320 Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
315- SI ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
321+ Store ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
316322
317323 SmallVector<Value *, 10 > Ops;
318324 SmallVector<int , 16 > NewShuffleMask;
@@ -328,13 +334,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
328334
329335 NewShuffleMask.clear ();
330336 }
331- // This VL should be OK (should be executable in one vsseg instruction,
332- // potentially under larger LMULs) because we checked that the fixed vector
333- // type fits in isLegalInterleavedAccessType
334- Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
335- Value *StoreMask = Builder.getAllOnesMask (VTy->getElementCount ());
336- Ops.append ({SI->getPointerOperand (), StoreMask, VL});
337-
337+ Ops.append ({Ptr, LaneMask, VL});
338338 Builder.CreateCall (VssegNFunc, Ops);
339339
340340 return true ;
@@ -457,91 +457,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
457457 Builder.CreateCall (VssegNFunc, Operands);
458458 return true ;
459459}
460-
461- // / Lower an interleaved vp.store into a vssegN intrinsic.
462- // /
463- // / E.g. Lower an interleaved vp.store (Factor = 2):
464- // /
465- // / %is = tail call <vscale x 64 x i8>
466- // / @llvm.vector.interleave2.nxv64i8(
467- // / <vscale x 32 x i8> %load0,
468- // / <vscale x 32 x i8> %load1
469- // / %wide.rvl = shl nuw nsw i32 %rvl, 1
470- // / tail call void @llvm.vp.store.nxv64i8.p0(
471- // / <vscale x 64 x i8> %is, ptr %ptr,
472- // / %mask,
473- // / i32 %wide.rvl)
474- // /
475- // / Into:
476- // / call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
477- // / <vscale x 32 x i8> %load1,
478- // / <vscale x 32 x i8> %load2, ptr %ptr,
479- // / %mask,
480- // / i64 %rvl)
481- bool RISCVTargetLowering::lowerInterleavedVPStore (
482- VPIntrinsic *Store, Value *Mask,
483- ArrayRef<Value *> InterleaveOperands) const {
484- assert (Mask && " Expect a valid mask" );
485- assert (Store->getIntrinsicID () == Intrinsic::vp_store &&
486- " Unexpected intrinsic" );
487-
488- const unsigned Factor = InterleaveOperands.size ();
489-
490- auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0 ]->getType ());
491- if (!VTy)
492- return false ;
493-
494- const DataLayout &DL = Store->getDataLayout ();
495- Align Alignment = Store->getParamAlign (1 ).value_or (
496- DL.getABITypeAlign (VTy->getElementType ()));
497- if (!isLegalInterleavedAccessType (
498- VTy, Factor, Alignment,
499- Store->getArgOperand (1 )->getType ()->getPointerAddressSpace (), DL))
500- return false ;
501-
502- IRBuilder<> Builder (Store);
503- Value *WideEVL = Store->getArgOperand (3 );
504- // Conservatively check if EVL is a multiple of factor, otherwise some
505- // (trailing) elements might be lost after the transformation.
506- if (!isMultipleOfN (WideEVL, Store->getDataLayout (), Factor))
507- return false ;
508-
509- auto *PtrTy = Store->getArgOperand (1 )->getType ();
510- auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
511- auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
512- Value *EVL =
513- Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
514-
515- if (isa<FixedVectorType>(VTy)) {
516- SmallVector<Value *, 8 > Operands (InterleaveOperands);
517- Operands.append ({Store->getArgOperand (1 ), Mask, EVL});
518- Builder.CreateIntrinsic (FixedVssegIntrIds[Factor - 2 ],
519- {VTy, PtrTy, XLenTy}, Operands);
520- return true ;
521- }
522-
523- unsigned SEW = DL.getTypeSizeInBits (VTy->getElementType ());
524- unsigned NumElts = VTy->getElementCount ().getKnownMinValue ();
525- Type *VecTupTy = TargetExtType::get (
526- Store->getContext (), " riscv.vector.tuple" ,
527- ScalableVectorType::get (Type::getInt8Ty (Store->getContext ()),
528- NumElts * SEW / 8 ),
529- Factor);
530-
531- Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration (
532- Store->getModule (), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
533- Value *StoredVal = PoisonValue::get (VecTupTy);
534- for (unsigned i = 0 ; i < Factor; ++i)
535- StoredVal = Builder.CreateCall (
536- VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32 (i)});
537-
538- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
539- Store->getModule (), ScalableVssegIntrIds[Factor - 2 ],
540- {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
541-
542- Value *Operands[] = {StoredVal, Store->getArgOperand (1 ), Mask, EVL,
543- ConstantInt::get (XLenTy, Log2_64 (SEW))};
544-
545- Builder.CreateCall (VssegNFunc, Operands);
546- return true ;
547- }
0 commit comments