@@ -234,22 +234,28 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
234234// /
235235// / Note that the new shufflevectors will be removed and we'll only generate one
236236// / vsseg3 instruction in CodeGen.
237- bool RISCVTargetLowering::lowerInterleavedStore (StoreInst *SI,
237+ bool RISCVTargetLowering::lowerInterleavedStore (Instruction *Store,
238+ Value *LaneMask,
238239 ShuffleVectorInst *SVI,
239240 unsigned Factor) const {
240- IRBuilder<> Builder (SI );
241- const DataLayout &DL = SI ->getDataLayout ();
241+ IRBuilder<> Builder (Store );
242+ const DataLayout &DL = Store ->getDataLayout ();
242243 auto Mask = SVI->getShuffleMask ();
243244 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType ());
244245 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
245246 auto *VTy = FixedVectorType::get (ShuffleVTy->getElementType (),
246247 ShuffleVTy->getNumElements () / Factor);
247- if (!isLegalInterleavedAccessType (VTy, Factor, SI->getAlign (),
248- SI->getPointerAddressSpace (), DL))
248+ auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
249+
250+ Value *Ptr, *VL;
251+ Align Alignment;
252+ if (!getMemOperands (Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
249253 return false ;
250254
251- auto *PtrTy = SI->getPointerOperandType ();
252- auto *XLenTy = Type::getIntNTy (SI->getContext (), Subtarget.getXLen ());
255+ Type *PtrTy = Ptr->getType ();
256+ unsigned AS = PtrTy->getPointerAddressSpace ();
257+ if (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
258+ return false ;
253259
254260 unsigned Index;
255261 // If the segment store only has one active lane (i.e. the interleave is
@@ -260,26 +266,27 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
260266 unsigned ScalarSizeInBytes =
261267 DL.getTypeStoreSize (ShuffleVTy->getElementType ());
262268 Value *Data = SVI->getOperand (0 );
263- auto *DataVTy = cast<FixedVectorType>( Data-> getType ( ));
269+ Data = Builder. CreateExtractVector (VTy, Data, uint64_t ( 0 ));
264270 Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
265271 Value *Offset = ConstantInt::get (XLenTy, Index * ScalarSizeInBytes);
266- Value *BasePtr = Builder.CreatePtrAdd (SI->getPointerOperand (), Offset);
267- Value *Mask = Builder.getAllOnesMask (DataVTy->getElementCount ());
268- Value *VL = Builder.CreateElementCount (Builder.getInt32Ty (),
272+ Value *BasePtr = Builder.CreatePtrAdd (Ptr, Offset);
273+ // Note: Same VL as above, but i32 not xlen due to signature of
274+ // vp.strided.store
275+ VL = Builder.CreateElementCount (Builder.getInt32Ty (),
269276 VTy->getElementCount ());
270277
271278 CallInst *CI = Builder.CreateIntrinsic (
272279 Intrinsic::experimental_vp_strided_store,
273- {Data-> getType () , BasePtr->getType (), Stride->getType ()},
274- {Data, BasePtr, Stride, Mask , VL});
275- CI->addParamAttr (
276- 1 , Attribute::getWithAlignment (CI->getContext (), SI-> getAlign () ));
280+ {VTy , BasePtr->getType (), Stride->getType ()},
281+ {Data, BasePtr, Stride, LaneMask , VL});
282+ CI->addParamAttr (1 ,
283+ Attribute::getWithAlignment (CI->getContext (), Alignment ));
277284
278285 return true ;
279286 }
280287
281288 Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
282- SI ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
289+ Store ->getModule (), FixedVssegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy});
283290
284291 SmallVector<Value *, 10 > Ops;
285292 SmallVector<int , 16 > NewShuffleMask;
@@ -295,13 +302,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
295302
296303 NewShuffleMask.clear ();
297304 }
298- // This VL should be OK (should be executable in one vsseg instruction,
299- // potentially under larger LMULs) because we checked that the fixed vector
300- // type fits in isLegalInterleavedAccessType
301- Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
302- Value *StoreMask = Builder.getAllOnesMask (VTy->getElementCount ());
303- Ops.append ({SI->getPointerOperand (), StoreMask, VL});
304-
305+ Ops.append ({Ptr, LaneMask, VL});
305306 Builder.CreateCall (VssegNFunc, Ops);
306307
307308 return true ;
@@ -424,91 +425,3 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
424425 Builder.CreateCall (VssegNFunc, Operands);
425426 return true ;
426427}
427-
428- // / Lower an interleaved vp.store into a vssegN intrinsic.
429- // /
430- // / E.g. Lower an interleaved vp.store (Factor = 2):
431- // /
432- // / %is = tail call <vscale x 64 x i8>
433- // / @llvm.vector.interleave2.nxv64i8(
434- // / <vscale x 32 x i8> %load0,
435- // / <vscale x 32 x i8> %load1
436- // / %wide.rvl = shl nuw nsw i32 %rvl, 1
437- // / tail call void @llvm.vp.store.nxv64i8.p0(
438- // / <vscale x 64 x i8> %is, ptr %ptr,
439- // / %mask,
440- // / i32 %wide.rvl)
441- // /
442- // / Into:
443- // / call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
444- // / <vscale x 32 x i8> %load1,
445- // / <vscale x 32 x i8> %load2, ptr %ptr,
446- // / %mask,
447- // / i64 %rvl)
448- bool RISCVTargetLowering::lowerInterleavedVPStore (
449- VPIntrinsic *Store, Value *Mask,
450- ArrayRef<Value *> InterleaveOperands) const {
451- assert (Mask && " Expect a valid mask" );
452- assert (Store->getIntrinsicID () == Intrinsic::vp_store &&
453- " Unexpected intrinsic" );
454-
455- const unsigned Factor = InterleaveOperands.size ();
456-
457- auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0 ]->getType ());
458- if (!VTy)
459- return false ;
460-
461- const DataLayout &DL = Store->getDataLayout ();
462- Align Alignment = Store->getParamAlign (1 ).value_or (
463- DL.getABITypeAlign (VTy->getElementType ()));
464- if (!isLegalInterleavedAccessType (
465- VTy, Factor, Alignment,
466- Store->getArgOperand (1 )->getType ()->getPointerAddressSpace (), DL))
467- return false ;
468-
469- IRBuilder<> Builder (Store);
470- Value *WideEVL = Store->getArgOperand (3 );
471- // Conservatively check if EVL is a multiple of factor, otherwise some
472- // (trailing) elements might be lost after the transformation.
473- if (!isMultipleOfN (WideEVL, Store->getDataLayout (), Factor))
474- return false ;
475-
476- auto *PtrTy = Store->getArgOperand (1 )->getType ();
477- auto *XLenTy = Type::getIntNTy (Store->getContext (), Subtarget.getXLen ());
478- auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
479- Value *EVL =
480- Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
481-
482- if (isa<FixedVectorType>(VTy)) {
483- SmallVector<Value *, 8 > Operands (InterleaveOperands);
484- Operands.append ({Store->getArgOperand (1 ), Mask, EVL});
485- Builder.CreateIntrinsic (FixedVssegIntrIds[Factor - 2 ],
486- {VTy, PtrTy, XLenTy}, Operands);
487- return true ;
488- }
489-
490- unsigned SEW = DL.getTypeSizeInBits (VTy->getElementType ());
491- unsigned NumElts = VTy->getElementCount ().getKnownMinValue ();
492- Type *VecTupTy = TargetExtType::get (
493- Store->getContext (), " riscv.vector.tuple" ,
494- ScalableVectorType::get (Type::getInt8Ty (Store->getContext ()),
495- NumElts * SEW / 8 ),
496- Factor);
497-
498- Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration (
499- Store->getModule (), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
500- Value *StoredVal = PoisonValue::get (VecTupTy);
501- for (unsigned i = 0 ; i < Factor; ++i)
502- StoredVal = Builder.CreateCall (
503- VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32 (i)});
504-
505- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration (
506- Store->getModule (), ScalableVssegIntrIds[Factor - 2 ],
507- {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
508-
509- Value *Operands[] = {StoredVal, Store->getArgOperand (1 ), Mask, EVL,
510- ConstantInt::get (XLenTy, Log2_64 (SEW))};
511-
512- Builder.CreateCall (VssegNFunc, Operands);
513- return true ;
514- }
0 commit comments