@@ -2640,17 +2640,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
26402640}
26412641#endif
26422642
2643- // / Use all-true mask for reverse rather than actual mask, as it avoids a
2644- // / dependence w/o affecting the result.
2645- static Instruction *createReverseEVL (IRBuilderBase &Builder, Value *Operand,
2646- Value *EVL, const Twine &Name) {
2647- VectorType *ValTy = cast<VectorType>(Operand->getType ());
2648- Value *AllTrueMask =
2649- Builder.CreateVectorSplat (ValTy->getElementCount (), Builder.getTrue ());
2650- return Builder.CreateIntrinsic (ValTy, Intrinsic::experimental_vp_reverse,
2651- {Operand, AllTrueMask, EVL}, nullptr , Name);
2652- }
2653-
26542643void VPWidenLoadEVLRecipe::execute (VPTransformState &State) {
26552644 auto *LI = cast<LoadInst>(&Ingredient);
26562645
@@ -2665,19 +2654,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26652654 Value *EVL = State.get (getEVL (), VPLane (0 ));
26662655 Value *Addr = State.get (getAddr (), !CreateGather);
26672656 Value *Mask = nullptr ;
2668- if (VPValue *VPMask = getMask ()) {
2657+ if (VPValue *VPMask = getMask ())
26692658 Mask = State.get (VPMask);
2670- if (isReverse ())
2671- Mask = createReverseEVL (Builder, Mask, EVL, " vp.reverse.mask" );
2672- } else {
2659+ else
26732660 Mask = Builder.CreateVectorSplat (State.VF , Builder.getTrue ());
2674- }
26752661
26762662 if (CreateGather) {
26772663 NewLI =
26782664 Builder.CreateIntrinsic (DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
26792665 nullptr , " wide.masked.gather" );
26802666 } else {
2667+ if (isReverse ()) {
2668+ auto *EltTy = DataTy->getElementType ();
2669+ // if (EltTy->getScalarSizeInBits() !=
2670+ // EVL->getType()->getScalarSizeInBits())
2671+ // EVL = ConstantInt::getSigned(EVL->getType(),
2672+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2673+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts ());
2674+ Value *Offset = Builder.CreateSub (State.Builder .getInt32 (1 ), EVL);
2675+ Addr = Builder.CreateGEP (EltTy, Addr, Offset, " " , GEP->isInBounds ());
2676+ }
26812677 VectorBuilder VBuilder (Builder);
26822678 VBuilder.setEVL (EVL).setMask (Mask);
26832679 NewLI = cast<CallInst>(VBuilder.createVectorInstruction (
@@ -2686,10 +2682,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26862682 NewLI->addParamAttr (
26872683 0 , Attribute::getWithAlignment (NewLI->getContext (), Alignment));
26882684 State.addMetadata (NewLI, LI);
2689- Instruction *Res = NewLI;
2690- if (isReverse ())
2691- Res = createReverseEVL (Builder, Res, EVL, " vp.reverse" );
2692- State.set (this , Res);
2685+ State.set (this , NewLI);
26932686}
26942687
26952688InstructionCost VPWidenLoadEVLRecipe::computeCost (ElementCount VF,
@@ -2707,14 +2700,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
27072700 getLoadStoreAlignment (const_cast <Instruction *>(&Ingredient));
27082701 unsigned AS =
27092702 getLoadStoreAddressSpace (const_cast <Instruction *>(&Ingredient));
2710- InstructionCost Cost = Ctx.TTI .getMaskedMemoryOpCost (
2711- Ingredient.getOpcode (), Ty, Alignment, AS, Ctx.CostKind );
2712- if (!Reverse)
2713- return Cost;
2703+ // if (!Reverse)
2704+ return Ctx.TTI .getMaskedMemoryOpCost (Ingredient.getOpcode (), Ty, Alignment,
2705+ AS, Ctx.CostKind );
27142706
2715- return Cost + Ctx.TTI .getShuffleCost (TargetTransformInfo::SK_Reverse ,
2716- cast<VectorType>(Ty ), {}, Ctx. CostKind ,
2717- 0 );
2707+ // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty ,
2708+ // getAddr()->getUnderlyingValue( ), false ,
2709+ // Alignment, Ctx.CostKind );
27182710}
27192711
27202712#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2775,7 +2767,8 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
27752767
27762768void VPWidenStoreEVLRecipe::execute (VPTransformState &State) {
27772769 auto *SI = cast<StoreInst>(&Ingredient);
2778-
2770+ Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2771+ auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
27792772 VPValue *StoredValue = getStoredValue ();
27802773 bool CreateScatter = !isConsecutive ();
27812774 const Align Alignment = getLoadStoreAlignment (&Ingredient);
@@ -2786,22 +2779,32 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27862779 CallInst *NewSI = nullptr ;
27872780 Value *StoredVal = State.get (StoredValue);
27882781 Value *EVL = State.get (getEVL (), VPLane (0 ));
2789- if (isReverse ())
2790- StoredVal = createReverseEVL (Builder, StoredVal, EVL, " vp.reverse" );
27912782 Value *Mask = nullptr ;
2792- if (VPValue *VPMask = getMask ()) {
2783+ if (VPValue *VPMask = getMask ())
27932784 Mask = State.get (VPMask);
2794- if (isReverse ())
2795- Mask = createReverseEVL (Builder, Mask, EVL, " vp.reverse.mask" );
2796- } else {
2785+ else
27972786 Mask = Builder.CreateVectorSplat (State.VF , Builder.getTrue ());
2798- }
2787+
27992788 Value *Addr = State.get (getAddr (), !CreateScatter);
28002789 if (CreateScatter) {
28012790 NewSI = Builder.CreateIntrinsic (Type::getVoidTy (EVL->getContext ()),
28022791 Intrinsic::vp_scatter,
28032792 {StoredVal, Addr, Mask, EVL});
28042793 } else {
2794+ if (isReverse ()) {
2795+ auto *EltTy = DataTy->getElementType ();
2796+ // FIXME: we may need not deal with the size, the InstCombine will deal
2797+ // with the Offset Type if (EltTy->getScalarSizeInBits() !=
2798+ // EVL->getType()->getScalarSizeInBits())
2799+ // EVL = ConstantInt::getSigned(EVL->getType(),
2800+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2801+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts ());
2802+ // Value *Offset =
2803+ // Builder.CreateSub(State.Builder.getIntN(EVL->getType()->getScalarSizeInBits(),
2804+ // 1), EVL);
2805+ Value *Offset = Builder.CreateSub (State.Builder .getInt32 (1 ), EVL);
2806+ Addr = Builder.CreateGEP (EltTy, Addr, Offset, " " , GEP->isInBounds ());
2807+ }
28052808 VectorBuilder VBuilder (Builder);
28062809 VBuilder.setEVL (EVL).setMask (Mask);
28072810 NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
@@ -2828,14 +2831,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
28282831 getLoadStoreAlignment (const_cast <Instruction *>(&Ingredient));
28292832 unsigned AS =
28302833 getLoadStoreAddressSpace (const_cast <Instruction *>(&Ingredient));
2831- InstructionCost Cost = Ctx.TTI .getMaskedMemoryOpCost (
2832- Ingredient.getOpcode (), Ty, Alignment, AS, Ctx.CostKind );
2833- if (!Reverse)
2834- return Cost;
2834+ // if (!Reverse)
2835+ return Ctx.TTI .getMaskedMemoryOpCost (Ingredient.getOpcode (), Ty, Alignment,
2836+ AS, Ctx.CostKind );
28352837
2836- return Cost + Ctx.TTI .getShuffleCost (TargetTransformInfo::SK_Reverse ,
2837- cast<VectorType>(Ty ), {}, Ctx. CostKind ,
2838- 0 );
2838+ // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty ,
2839+ // getAddr()->getUnderlyingValue( ), false ,
2840+ // Alignment, Ctx.CostKind );
28392841}
28402842
28412843#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
0 commit comments