@@ -115,21 +115,49 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
115115// / %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
116116// / %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
117117bool RISCVTargetLowering::lowerInterleavedLoad (
118- LoadInst *LI , ArrayRef<ShuffleVectorInst *> Shuffles,
118+ Instruction *Load, Value *Mask , ArrayRef<ShuffleVectorInst *> Shuffles,
119119 ArrayRef<unsigned > Indices, unsigned Factor) const {
120120 assert (Indices.size () == Shuffles.size ());
121121
122- IRBuilder<> Builder (LI);
123-
124- const DataLayout &DL = LI->getDataLayout ();
122+ IRBuilder<> Builder (Load);
125123
124+ const DataLayout &DL = Load->getDataLayout ();
126125 auto *VTy = cast<FixedVectorType>(Shuffles[0 ]->getType ());
127- if (!isLegalInterleavedAccessType (VTy, Factor, LI->getAlign (),
128- LI->getPointerAddressSpace (), DL))
129- return false ;
126+ auto *XLenTy = Type::getIntNTy (Load->getContext (), Subtarget.getXLen ());
127+
128+ Value *Ptr, *VL;
129+ Align Alignment;
130+ if (auto *LI = dyn_cast<LoadInst>(Load)) {
131+ assert (LI->isSimple ());
132+ Ptr = LI->getPointerOperand ();
133+ Alignment = LI->getAlign ();
134+ assert (!Mask && " Unexpected mask on a load\n " );
135+ Mask = Builder.getAllOnesMask (VTy->getElementCount ());
136+ VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
137+ } else {
138+ auto *VPLoad = cast<VPIntrinsic>(Load);
139+ assert (VPLoad->getIntrinsicID () == Intrinsic::vp_load &&
140+ " Unexpected intrinsic" );
141+ Ptr = VPLoad->getMemoryPointerParam ();
142+ Alignment = VPLoad->getPointerAlignment ().value_or (
143+ DL.getABITypeAlign (VTy->getElementType ()));
130144
131- auto *PtrTy = LI->getPointerOperandType ();
132- auto *XLenTy = Type::getIntNTy (LI->getContext (), Subtarget.getXLen ());
145+ assert (Mask && " vp.load needs a mask!" );
146+
147+ Value *WideEVL = VPLoad->getVectorLengthParam ();
148+ // Conservatively check if EVL is a multiple of factor, otherwise some
149+ // (trailing) elements might be lost after the transformation.
150+ if (!isMultipleOfN (WideEVL, DL, Factor))
151+ return false ;
152+
153+ auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
154+ VL = Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
155+ }
156+
157+ Type *PtrTy = Ptr->getType ();
158+ unsigned AS = PtrTy->getPointerAddressSpace ();
159+ if (!isLegalInterleavedAccessType (VTy, Factor, Alignment, AS, DL))
160+ return false ;
133161
134162 // If the segment load is going to be performed segment at a time anyways
135163 // and there's only one element used, use a strided load instead. This
@@ -138,26 +166,23 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
138166 unsigned ScalarSizeInBytes = DL.getTypeStoreSize (VTy->getElementType ());
139167 Value *Stride = ConstantInt::get (XLenTy, Factor * ScalarSizeInBytes);
140168 Value *Offset = ConstantInt::get (XLenTy, Indices[0 ] * ScalarSizeInBytes);
141- Value *BasePtr = Builder.CreatePtrAdd (LI-> getPointerOperand () , Offset);
142- Value *Mask = Builder. getAllOnesMask (VTy-> getElementCount ());
143- Value *VL = Builder. CreateElementCount (Builder. getInt32Ty (),
144- VTy-> getElementCount ());
145-
169+ Value *BasePtr = Builder.CreatePtrAdd (Ptr , Offset);
170+ // Note: Same VL as above, but i32 not xlen due to signature of
171+ // vp.strided.load
172+ VL = Builder. CreateElementCount (Builder. getInt32Ty (),
173+ VTy-> getElementCount ());
146174 CallInst *CI =
147175 Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_load,
148176 {VTy, BasePtr->getType (), Stride->getType ()},
149177 {BasePtr, Stride, Mask, VL});
150- CI->addParamAttr (
151- 0 , Attribute::getWithAlignment (CI->getContext (), LI-> getAlign () ));
178+ CI->addParamAttr (0 ,
179+ Attribute::getWithAlignment (CI->getContext (), Alignment ));
152180 Shuffles[0 ]->replaceAllUsesWith (CI);
153181 return true ;
154182 };
155183
156- Value *VL = Builder.CreateElementCount (XLenTy, VTy->getElementCount ());
157- Value *Mask = Builder.getAllOnesMask (VTy->getElementCount ());
158184 CallInst *VlsegN = Builder.CreateIntrinsic (
159- FixedVlsegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy},
160- {LI->getPointerOperand (), Mask, VL});
185+ FixedVlsegIntrIds[Factor - 2 ], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
161186
162187 for (unsigned i = 0 ; i < Shuffles.size (); i++) {
163188 Value *SubVec = Builder.CreateExtractValue (VlsegN, Indices[i]);
@@ -426,122 +451,6 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
426451 return true ;
427452}
428453
429- // / Lower an interleaved vp.load into a vlsegN intrinsic.
430- // /
431- // / E.g. Lower an interleaved vp.load (Factor = 2):
432- // / %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
433- // / %mask,
434- // / i32 %wide.rvl)
435- // / %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
436- // / @llvm.vector.deinterleave2.nxv64i8(
437- // / <vscale x 64 x i8> %l)
438- // / %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
439- // / %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
440- // /
441- // / Into:
442- // / %rvl = udiv %wide.rvl, 2
443- // / %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
444- // / @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
445- // / <vscale x 32 x i8> undef,
446- // / ptr %ptr,
447- // / %mask,
448- // / i64 %rvl,
449- // / i64 1)
450- // / %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
451- // / %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
452- // /
453- // / NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
454- // / removed by the caller
455- // / TODO: We probably can loosen the dependency on matching extractvalue when
456- // / dealing with factor of 2 (extractvalue is still required for most of other
457- // / factors though).
458- bool RISCVTargetLowering::lowerInterleavedVPLoad (
459- VPIntrinsic *Load, Value *Mask,
460- ArrayRef<Value *> DeinterleaveResults) const {
461- const unsigned Factor = DeinterleaveResults.size ();
462- assert (Mask && " Expect a valid mask" );
463- assert (Load->getIntrinsicID () == Intrinsic::vp_load &&
464- " Unexpected intrinsic" );
465-
466- Value *FirstActive = *llvm::find_if (DeinterleaveResults,
467- [](Value *V) { return V != nullptr ; });
468- VectorType *VTy = cast<VectorType>(FirstActive->getType ());
469-
470- auto &DL = Load->getModule ()->getDataLayout ();
471- Align Alignment = Load->getParamAlign (0 ).value_or (
472- DL.getABITypeAlign (VTy->getElementType ()));
473- if (!isLegalInterleavedAccessType (
474- VTy, Factor, Alignment,
475- Load->getArgOperand (0 )->getType ()->getPointerAddressSpace (), DL))
476- return false ;
477-
478- IRBuilder<> Builder (Load);
479-
480- Value *WideEVL = Load->getVectorLengthParam ();
481- // Conservatively check if EVL is a multiple of factor, otherwise some
482- // (trailing) elements might be lost after the transformation.
483- if (!isMultipleOfN (WideEVL, Load->getDataLayout (), Factor))
484- return false ;
485-
486- auto *PtrTy = Load->getArgOperand (0 )->getType ();
487- auto *XLenTy = Type::getIntNTy (Load->getContext (), Subtarget.getXLen ());
488- auto *FactorC = ConstantInt::get (WideEVL->getType (), Factor);
489- Value *EVL =
490- Builder.CreateZExt (Builder.CreateExactUDiv (WideEVL, FactorC), XLenTy);
491-
492- Value *Return = nullptr ;
493- if (isa<FixedVectorType>(VTy)) {
494- Return = Builder.CreateIntrinsic (FixedVlsegIntrIds[Factor - 2 ],
495- {VTy, PtrTy, XLenTy},
496- {Load->getArgOperand (0 ), Mask, EVL});
497- } else {
498- unsigned SEW = DL.getTypeSizeInBits (VTy->getElementType ());
499- unsigned NumElts = VTy->getElementCount ().getKnownMinValue ();
500- Type *VecTupTy = TargetExtType::get (
501- Load->getContext (), " riscv.vector.tuple" ,
502- ScalableVectorType::get (Type::getInt8Ty (Load->getContext ()),
503- NumElts * SEW / 8 ),
504- Factor);
505-
506- Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration (
507- Load->getModule (), ScalableVlsegIntrIds[Factor - 2 ],
508- {VecTupTy, PtrTy, Mask->getType (), EVL->getType ()});
509-
510- Value *Operands[] = {
511- PoisonValue::get (VecTupTy),
512- Load->getArgOperand (0 ),
513- Mask,
514- EVL,
515- ConstantInt::get (XLenTy,
516- RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
517- ConstantInt::get (XLenTy, Log2_64 (SEW))};
518-
519- CallInst *VlsegN = Builder.CreateCall (VlsegNFunc, Operands);
520-
521- SmallVector<Type *, 8 > AggrTypes{Factor, VTy};
522- Return = PoisonValue::get (StructType::get (Load->getContext (), AggrTypes));
523- Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration (
524- Load->getModule (), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
525- for (unsigned i = 0 ; i < Factor; ++i) {
526- Value *VecExtract =
527- Builder.CreateCall (VecExtractFunc, {VlsegN, Builder.getInt32 (i)});
528- Return = Builder.CreateInsertValue (Return, VecExtract, i);
529- }
530- }
531-
532- for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
533- if (!DIO)
534- continue ;
535- // We have to create a brand new ExtractValue to replace each
536- // of these old ExtractValue instructions.
537- Value *NewEV =
538- Builder.CreateExtractValue (Return, {static_cast <unsigned >(Idx)});
539- DIO->replaceAllUsesWith (NewEV);
540- }
541-
542- return true ;
543- }
544-
545454// / Lower an interleaved vp.store into a vssegN intrinsic.
546455// /
547456// / E.g. Lower an interleaved vp.store (Factor = 2):
0 commit comments