Skip to content

Commit fe13002

Browse files
committed
[Fix] New operand Stride for VPVectorPointerRecipe
1 parent f6a722f commit fe13002

File tree

6 files changed

+39
-36
lines changed

6 files changed

+39
-36
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7774,10 +7774,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
77747774
new VPVectorEndPointerRecipe(Ptr, &Plan.getVF(), getLoadStoreType(I),
77757775
/*Stride*/ -1, Flags, I->getDebugLoc());
77767776
} else {
7777-
VectorPtr = new VPVectorPointerRecipe(
7778-
Ptr, getLoadStoreType(I), /*Strided*/ false,
7779-
GEP ? GEP->getNoWrapFlags() : GEPNoWrapFlags::none(),
7780-
I->getDebugLoc());
7777+
const DataLayout &DL = I->getDataLayout();
7778+
auto *StrideTy = DL.getIndexType(Ptr->getUnderlyingValue()->getType());
7779+
VPValue *StrideOne = Plan.getOrAddLiveIn(ConstantInt::get(StrideTy, 1));
7780+
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), StrideOne,
7781+
GEP ? GEP->getNoWrapFlags()
7782+
: GEPNoWrapFlags::none(),
7783+
I->getDebugLoc());
77817784
}
77827785
Builder.insert(VectorPtr);
77837786
Ptr = VectorPtr;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,24 +1767,21 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
17671767
};
17681768

17691769
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1770-
/// Supports both consecutive and reverse consecutive accesses.
1771-
/// TODO: Support non-unit strided accesses .
17721770
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
1773-
public VPUnrollPartAccessor<1> {
1771+
public VPUnrollPartAccessor<2> {
17741772
Type *IndexedTy;
17751773

1776-
/// Indicate whether to compute the pointer for strided memory accesses.
1777-
bool Strided;
1778-
17791774
public:
1780-
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool Strided,
1775+
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, VPValue *Stride,
17811776
GEPNoWrapFlags GEPFlags, DebugLoc DL)
1782-
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1783-
GEPFlags, DL),
1784-
IndexedTy(IndexedTy), Strided(Strided) {}
1777+
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC,
1778+
ArrayRef<VPValue *>({Ptr, Stride}), GEPFlags, DL),
1779+
IndexedTy(IndexedTy) {}
17851780

17861781
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
17871782

1783+
VPValue *getStride() const { return getOperand(1); }
1784+
17881785
void execute(VPTransformState &State) override;
17891786

17901787
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -1802,7 +1799,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
18021799
}
18031800

18041801
VPVectorPointerRecipe *clone() override {
1805-
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, Strided,
1802+
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, getOperand(1),
18061803
getGEPNoWrapFlags(), getDebugLoc());
18071804
}
18081805

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2384,16 +2384,20 @@ void VPVectorEndPointerRecipe::print(raw_ostream &O, const Twine &Indent,
23842384
void VPVectorPointerRecipe::execute(VPTransformState &State) {
23852385
auto &Builder = State.Builder;
23862386
unsigned CurrentPart = getUnrollPart(*this);
2387-
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2388-
/*IsUnitStride*/ true, CurrentPart, Builder);
2387+
Value *Stride = State.get(getStride(), /*IsScalar*/ true);
2388+
2389+
auto *StrideC = dyn_cast<ConstantInt>(Stride);
2390+
bool IsStrideOne = StrideC && StrideC->isOne();
2391+
bool IsUnitStride = IsStrideOne || (StrideC && StrideC->isMinusOne());
2392+
Type *IndexTy =
2393+
getGEPIndexTy(State.VF.isScalable(),
2394+
/*IsReverse*/ false, IsUnitStride, CurrentPart, Builder);
23892395
Value *Ptr = State.get(getOperand(0), VPLane(0));
23902396

2397+
Stride = Builder.CreateSExtOrTrunc(Stride, IndexTy);
23912398
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2392-
// TODO: Support non-unit-reverse strided accesses.
2393-
Value *Index =
2394-
Strided
2395-
? Builder.CreateMul(Increment, ConstantInt::getSigned(IndexTy, -1))
2396-
: Increment;
2399+
Value *Index = IsStrideOne ? Increment : Builder.CreateMul(Increment, Stride);
2400+
23972401
Value *ResultPtr =
23982402
Builder.CreateGEP(IndexedTy, Ptr, Index, "", getGEPNoWrapFlags());
23992403

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2732,22 +2732,21 @@ void VPlanTransforms::convertToStridedAccesses(VPlan &Plan, VPCostContext &Ctx,
27322732
// The stride of consecutive reverse access must be -1.
27332733
int64_t Stride = -1;
27342734
auto *GEP = dyn_cast<GetElementPtrInst>(PtrUV->stripPointerCasts());
2735+
const DataLayout &DL = Ingredient.getDataLayout();
2736+
auto *StrideTy = DL.getIndexType(PtrUV->getType());
2737+
VPValue *StrideVPV =
2738+
Plan.getOrAddLiveIn(ConstantInt::get(StrideTy, Stride));
27352739
// Create a new vector pointer for strided access.
2736-
auto *NewPtr = new VPVectorPointerRecipe(Ptr, ElementTy, /*Stride=*/true,
2740+
auto *NewPtr = new VPVectorPointerRecipe(Ptr, ElementTy, StrideVPV,
27372741
GEP ? GEP->getNoWrapFlags()
27382742
: GEPNoWrapFlags::none(),
27392743
VecEndPtr->getDebugLoc());
27402744
NewPtr->insertBefore(MemR);
27412745

27422746
auto *LoadR = cast<VPWidenLoadRecipe>(MemR);
2743-
auto *LI = cast<LoadInst>(&Ingredient);
2744-
const DataLayout &DL = LI->getDataLayout();
2745-
auto *StrideTy = DL.getIndexType(LI->getPointerOperand()->getType());
2746-
VPValue *StrideVPV =
2747-
Plan.getOrAddLiveIn(ConstantInt::get(StrideTy, Stride));
27482747
auto *StridedLoad = new VPWidenStridedLoadRecipe(
2749-
*LI, NewPtr, StrideVPV, &Plan.getVF(), LoadR->getMask(), *LoadR,
2750-
LoadR->getDebugLoc());
2748+
*cast<LoadInst>(&Ingredient), NewPtr, StrideVPV, &Plan.getVF(),
2749+
LoadR->getMask(), *LoadR, LoadR->getDebugLoc());
27512750
StridedLoad->insertBefore(LoadR);
27522751
LoadR->replaceAllUsesWith(StridedLoad);
27532752

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
103103
; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[STEPS]]>, ir<-1>
104104
; CHECK-NEXT: CLONE ir<[[ZEXT_IDX:%.+]]> = zext ir<[[IDX]]>
105105
; CHECK-NEXT: CLONE ir<[[LD_IDX:%.+]]> = getelementptr inbounds ir<%B>, ir<[[ZEXT_IDX]]>
106-
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>
106+
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>, ir<-1>
107107
; CHECK-NEXT: WIDEN ir<[[LD:%.+]]> = load vp<[[LD_PTR]]>, stride = ir<-1>, runtimeVF = vp<[[VF]]>
108108
; CHECK-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD]]>, ir<1>
109109
; CHECK-NEXT: CLONE ir<[[ST_IDX:%.+]]> = getelementptr inbounds ir<%A>, ir<[[ZEXT_IDX]]>
@@ -237,7 +237,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
237237
; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[DEV_IV]]>, ir<-1>
238238
; CHECK-NEXT: CLONE ir<[[ZEXT_IDX:%.+]]> = zext ir<[[IDX]]>
239239
; CHECK-NEXT: CLONE ir<[[LD_IDX:%.+]]> = getelementptr inbounds ir<%B>, ir<[[ZEXT_IDX]]>
240-
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>
240+
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>, ir<-1>
241241
; CHECK-NEXT: WIDEN ir<[[LD:%.+]]> = load vp<[[LD_PTR]]>, stride = ir<-1>, runtimeVF = ir<[[VF]]>
242242
; CHECK-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD]]>, ir<1>
243243
; CHECK-NEXT: CLONE ir<[[ST_IDX:%.+]]> = getelementptr inbounds ir<%A>, ir<[[ZEXT_IDX]]>
@@ -510,7 +510,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
510510
; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[STEPS]]>, ir<-1>
511511
; CHECK-NEXT: CLONE ir<[[ZEXT_IDX:%.+]]> = zext ir<[[IDX]]>
512512
; CHECK-NEXT: CLONE ir<[[LD_IDX:%.+]]> = getelementptr inbounds ir<%B>, ir<[[ZEXT_IDX]]>
513-
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>
513+
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>, ir<-1>
514514
; CHECK-NEXT: WIDEN ir<[[LD:%.+]]> = load vp<[[LD_PTR]]>, stride = ir<-1>, runtimeVF = vp<[[VF]]>
515515
; CHECK-NEXT: WIDEN ir<[[ADD:%.+]]> = fadd ir<[[LD]]>, ir<1.000000e+00>
516516
; CHECK-NEXT: CLONE ir<[[ST_IDX:%.+]]> = getelementptr inbounds ir<%A>, ir<[[ZEXT_IDX]]>
@@ -644,7 +644,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
644644
; CHECK-NEXT: CLONE ir<[[IDX:%.+]]> = add nsw vp<[[DEV_IV]]>, ir<-1>
645645
; CHECK-NEXT: CLONE ir<[[ZEXT_IDX:%.+]]> = zext ir<[[IDX]]>
646646
; CHECK-NEXT: CLONE ir<[[LD_IDX:%.+]]> = getelementptr inbounds ir<%B>, ir<[[ZEXT_IDX]]>
647-
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>
647+
; CHECK-NEXT: vp<[[LD_PTR:%.+]]> = vector-pointer ir<[[LD_IDX]]>, ir<-1>
648648
; CHECK-NEXT: WIDEN ir<[[LD:%.+]]> = load vp<[[LD_PTR]]>, stride = ir<-1>, runtimeVF = ir<[[VF]]>
649649
; CHECK-NEXT: WIDEN ir<[[ADD:%.+]]> = fadd ir<[[LD]]>, ir<1.000000e+00>
650650
; CHECK-NEXT: CLONE ir<[[ST_IDX:%.+]]> = getelementptr inbounds ir<%A>, ir<[[ZEXT_IDX]]>

llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
4242
; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION ir\<0\>, vp\<[[CAN_IV_NEXT:%.+]]\>\l" +
4343
; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>, vp\<[[VF]]\>\l" +
4444
; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr inbounds ir\<%y\>, vp\<[[STEPS]]\>\l" +
45-
; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer ir\<%arrayidx\>\l" +
45+
; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer ir\<%arrayidx\>, ir\<1\>\l" +
4646
; CHECK-NEXT: " WIDEN ir\<%lv\> = load vp\<[[VEC_PTR]]\>\l" +
4747
; CHECK-NEXT: " WIDEN-INTRINSIC ir\<%call\> = call llvm.sqrt(ir\<%lv\>)\l" +
4848
; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr inbounds ir\<%x\>, vp\<[[STEPS]]\>\l" +
49-
; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer ir\<%arrayidx2\>\l" +
49+
; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer ir\<%arrayidx2\>, ir\<1\>\l" +
5050
; CHECK-NEXT: " WIDEN store vp\<[[VEC_PTR2]]\>, ir\<%call\>\l" +
5151
; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = add nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" +
5252
; CHECK-NEXT: " EMIT branch-on-count vp\<[[CAN_IV_NEXT]]\>, vp\<[[VEC_TC]]\>\l" +

0 commit comments

Comments
 (0)