Skip to content

Commit e10ba0a

Browse files
committed
Step 3: Patch the implementation of load/store recipes
1 parent c69b5a4 commit e10ba0a

File tree

5 files changed

+118
-39
lines changed

5 files changed

+118
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8382,12 +8382,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83828382
Ptr = VectorPtr;
83838383
}
83848384
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8385-
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8385+
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, false,
83868386
I->getDebugLoc());
83878387

83888388
StoreInst *Store = cast<StoreInst>(I);
83898389
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8390-
Reverse, I->getDebugLoc());
8390+
Reverse, false, I->getDebugLoc());
83918391
}
83928392

83938393
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2570,6 +2570,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25702570
/// Whether the consecutive accessed addresses are in reverse order.
25712571
bool Reverse;
25722572

2573+
/// Whether the accessed addresses are evenly spaced apart by a fixed stride.
2574+
bool Strided;
2575+
25732576
/// Whether the memory access is masked.
25742577
bool IsMasked = false;
25752578

@@ -2583,9 +2586,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25832586

25842587
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
25852588
std::initializer_list<VPValue *> Operands,
2586-
bool Consecutive, bool Reverse, DebugLoc DL)
2589+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
25872590
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2588-
Reverse(Reverse) {
2591+
Reverse(Reverse), Strided(Strided) {
25892592
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
25902593
}
25912594

@@ -2613,6 +2616,10 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26132616
/// order.
26142617
bool isReverse() const { return Reverse; }
26152618

2619+
/// Return whether the accessed addresses are evenly spaced apart by a fixed
2620+
/// stride.
2621+
bool isStrided() const { return Strided; }
2622+
26162623
/// Return the address accessed by this recipe.
26172624
VPValue *getAddr() const { return getOperand(0); }
26182625

@@ -2642,16 +2649,16 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26422649
/// optional mask.
26432650
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
26442651
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2645-
bool Consecutive, bool Reverse, DebugLoc DL)
2652+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
26462653
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2647-
Reverse, DL),
2654+
Reverse, Strided, DL),
26482655
VPValue(this, &Load) {
26492656
setMask(Mask);
26502657
}
26512658

26522659
VPWidenLoadRecipe *clone() override {
26532660
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2654-
getMask(), Consecutive, Reverse,
2661+
getMask(), Consecutive, Reverse, Strided,
26552662
getDebugLoc());
26562663
}
26572664

@@ -2683,7 +2690,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
26832690
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
26842691
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
26852692
{L.getAddr(), &EVL}, L.isConsecutive(),
2686-
L.isReverse(), L.getDebugLoc()),
2693+
L.isReverse(), L.isStrided(), L.getDebugLoc()),
26872694
VPValue(this, &getIngredient()) {
26882695
setMask(Mask);
26892696
}
@@ -2720,16 +2727,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27202727
/// to store to and an optional mask.
27212728
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
27222729
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2723-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2730+
VPValue *Mask, bool Consecutive, bool Reverse,
2731+
bool Strided, DebugLoc DL)
27242732
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2725-
Consecutive, Reverse, DL) {
2733+
Consecutive, Reverse, Strided, DL) {
27262734
setMask(Mask);
27272735
}
27282736

27292737
VPWidenStoreRecipe *clone() override {
27302738
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
27312739
getStoredValue(), getMask(), Consecutive,
2732-
Reverse, getDebugLoc());
2740+
Reverse, Strided, getDebugLoc());
27332741
}
27342742

27352743
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2763,7 +2771,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
27632771
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
27642772
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
27652773
{S.getAddr(), S.getStoredValue(), &EVL},
2766-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2774+
S.isConsecutive(), S.isReverse(), S.isStrided(),
2775+
S.getDebugLoc()) {
27672776
setMask(Mask);
27682777
}
27692778

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,10 +2566,15 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
25662566
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
25672567
assert(!Reverse &&
25682568
"Inconsecutive memory access should not have the order.");
2569-
return Ctx.TTI.getAddressComputationCost(Ty) +
2570-
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2571-
IsMasked, Alignment, Ctx.CostKind,
2572-
&Ingredient);
2569+
if (Strided)
2570+
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, Ptr,
2571+
IsMasked, Alignment, Ctx.CostKind,
2572+
&Ingredient);
2573+
else
2574+
return Ctx.TTI.getAddressComputationCost(Ty) +
2575+
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2576+
IsMasked, Alignment, Ctx.CostKind,
2577+
&Ingredient);
25732578
}
25742579

25752580
InstructionCost Cost = 0;
@@ -2596,11 +2601,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
25962601
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
25972602
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
25982603
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2599-
bool CreateGather = !isConsecutive();
2604+
bool CreateGather = !isConsecutive() && !isStrided();
26002605

26012606
auto &Builder = State.Builder;
26022607
State.setDebugLocFrom(getDebugLoc());
2603-
Value *Mask = nullptr;
2608+
Value *Mask = isStrided()
2609+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2610+
: nullptr;
26042611
if (auto *VPMask = getMask()) {
26052612
// Mask reversal is only needed for non-all-one (null) masks, as reverse
26062613
// of a null all-one mask is a null mask.
@@ -2615,9 +2622,25 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
26152622
NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
26162623
"wide.masked.gather");
26172624
} else if (Mask) {
2618-
NewLI =
2619-
Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2620-
PoisonValue::get(DataTy), "wide.masked.load");
2625+
if (isStrided()) {
2626+
const DataLayout &DL = LI->getDataLayout();
2627+
auto *PtrTy = Addr->getType();
2628+
auto *StrideTy = DL.getIndexType(PtrTy);
2629+
// TODO: Support non-unit-reverse strided accesses.
2630+
auto *StrideVal =
2631+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2632+
Value *RuntimeVF =
2633+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2634+
NewLI = Builder.CreateIntrinsic(
2635+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2636+
{Addr, StrideVal, Mask, RuntimeVF}, nullptr, "wide.strided.load");
2637+
cast<CallInst>(NewLI)->addParamAttr(
2638+
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2639+
} else {
2640+
NewLI = Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2641+
PoisonValue::get(DataTy),
2642+
"wide.masked.load");
2643+
}
26212644
} else {
26222645
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
26232646
}
@@ -2655,7 +2678,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26552678
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
26562679
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
26572680
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2658-
bool CreateGather = !isConsecutive();
2681+
bool CreateGather = !isConsecutive() && !isStrided();
26592682

26602683
auto &Builder = State.Builder;
26612684
State.setDebugLocFrom(getDebugLoc());
@@ -2675,6 +2698,16 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26752698
NewLI =
26762699
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
26772700
nullptr, "wide.masked.gather");
2701+
} else if (isStrided()) {
2702+
const DataLayout &DL = LI->getDataLayout();
2703+
auto *PtrTy = Addr->getType();
2704+
auto *StrideTy = DL.getIndexType(PtrTy);
2705+
// TODO: Support non-unit-reverse strided accesses.
2706+
auto *StrideVal =
2707+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2708+
NewLI = Builder.CreateIntrinsic(
2709+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2710+
{Addr, StrideVal, Mask, EVL}, nullptr, "wide.strided.load");
26782711
} else {
26792712
VectorBuilder VBuilder(Builder);
26802713
VBuilder.setEVL(EVL).setMask(Mask);
@@ -2729,13 +2762,15 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27292762
auto *SI = cast<StoreInst>(&Ingredient);
27302763

27312764
VPValue *StoredVPValue = getStoredValue();
2732-
bool CreateScatter = !isConsecutive();
2765+
bool CreateScatter = !isConsecutive() && !isStrided();
27332766
const Align Alignment = getLoadStoreAlignment(&Ingredient);
27342767

27352768
auto &Builder = State.Builder;
27362769
State.setDebugLocFrom(getDebugLoc());
27372770

2738-
Value *Mask = nullptr;
2771+
Value *Mask = isStrided()
2772+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2773+
: nullptr;
27392774
if (auto *VPMask = getMask()) {
27402775
// Mask reversal is only needed for non-all-one (null) masks, as reverse
27412776
// of a null all-one mask is a null mask.
@@ -2754,12 +2789,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27542789
}
27552790
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
27562791
Instruction *NewSI = nullptr;
2757-
if (CreateScatter)
2792+
if (CreateScatter) {
27582793
NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2759-
else if (Mask)
2760-
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2761-
else
2794+
} else if (Mask) {
2795+
if (isStrided()) {
2796+
const DataLayout &DL = SI->getDataLayout();
2797+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2798+
Type *StoredEltTy = StoredVecTy->getElementType();
2799+
auto *PtrTy = Addr->getType();
2800+
auto *StrideTy = DL.getIndexType(PtrTy);
2801+
// TODO: Support non-unit-reverse strided accesses.
2802+
auto *StrideVal =
2803+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2804+
Value *RuntimeVF =
2805+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2806+
NewSI = Builder.CreateIntrinsic(
2807+
Intrinsic::experimental_vp_strided_store,
2808+
{StoredVecTy, PtrTy, StrideTy},
2809+
{StoredVal, Addr, StrideVal, Mask, RuntimeVF});
2810+
cast<CallInst>(NewSI)->addParamAttr(
2811+
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2812+
} else {
2813+
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2814+
}
2815+
} else {
27622816
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2817+
}
27632818
State.addMetadata(NewSI, SI);
27642819
}
27652820

@@ -2775,7 +2830,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27752830
auto *SI = cast<StoreInst>(&Ingredient);
27762831

27772832
VPValue *StoredValue = getStoredValue();
2778-
bool CreateScatter = !isConsecutive();
2833+
bool CreateScatter = !isConsecutive() && !isStrided();
27792834
const Align Alignment = getLoadStoreAlignment(&Ingredient);
27802835

27812836
auto &Builder = State.Builder;
@@ -2800,11 +2855,25 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
28002855
Intrinsic::vp_scatter,
28012856
{StoredVal, Addr, Mask, EVL});
28022857
} else {
2803-
VectorBuilder VBuilder(Builder);
2804-
VBuilder.setEVL(EVL).setMask(Mask);
2805-
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2806-
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2807-
{StoredVal, Addr}));
2858+
if (isStrided()) {
2859+
const DataLayout &DL = SI->getDataLayout();
2860+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2861+
Type *StoredEltTy = StoredVecTy->getElementType();
2862+
auto *PtrTy = Addr->getType();
2863+
auto *StrideTy = DL.getIndexType(PtrTy);
2864+
// TODO: Support non-unit-reverse strided accesses.
2865+
auto *StrideVal =
2866+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2867+
NewSI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
2868+
{StoredVecTy, PtrTy, StrideTy},
2869+
{StoredVal, Addr, StrideVal, Mask, EVL});
2870+
} else {
2871+
VectorBuilder VBuilder(Builder);
2872+
VBuilder.setEVL(EVL).setMask(Mask);
2873+
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2874+
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2875+
{StoredVal, Addr}));
2876+
}
28082877
}
28092878
NewSI->addParamAttr(
28102879
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
7373
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
7474
NewRecipe = new VPWidenLoadRecipe(
7575
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
76-
false /*Consecutive*/, false /*Reverse*/,
76+
false /*Consecutive*/, false /*Reverse*/, false /*Strided*/,
7777
Ingredient.getDebugLoc());
7878
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
7979
NewRecipe = new VPWidenStoreRecipe(
8080
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
8181
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
82-
Ingredient.getDebugLoc());
82+
false /*Strided*/, Ingredient.getDebugLoc());
8383
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
8484
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
8585
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {

llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
10841084
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
10851085
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
10861086
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1087-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1087+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
10881088
EXPECT_TRUE(isa<VPUser>(&Recipe));
10891089
VPRecipeBase *BaseR = &Recipe;
10901090
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1195,7 +1195,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
11951195
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
11961196
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
11971197
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1198-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1198+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
11991199
EXPECT_FALSE(Recipe.mayHaveSideEffects());
12001200
EXPECT_TRUE(Recipe.mayReadFromMemory());
12011201
EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1209,7 +1209,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
12091209
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
12101210
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
12111211
VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
1212-
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {});
1212+
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, false,
1213+
{});
12131214
EXPECT_TRUE(Recipe.mayHaveSideEffects());
12141215
EXPECT_FALSE(Recipe.mayReadFromMemory());
12151216
EXPECT_TRUE(Recipe.mayWriteToMemory());

0 commit comments

Comments
 (0)