-
Couldn't load subscription status.
- Fork 15k
[VPlan] Rewrite sinkScalarOperands (NFC) #151696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesRewrite sinkScalarOperands in VPlanTransforms for clarity, with minimal test changes. Patch is 21.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151696.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..04521573298d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -122,53 +122,64 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
}
static bool sinkScalarOperands(VPlan &Plan) {
- auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
- for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
+ for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
+ VPBasicBlock *VPBB =
+ dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
for (auto &Recipe : *VPBB) {
- for (VPValue *Op : Recipe.operands())
- if (auto *Def =
- dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert(std::make_pair(VPBB, Def));
+ for (VPValue *Op : Recipe.operands()) {
+ auto *Def =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
+ if (!Def)
+ continue;
+
+ // We only know how to duplicate VPReplicateRecipes and
+ // VPScalarIVStepsRecipes for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Def))
+ continue;
+
+ // TODO: Relax checks in the future, e.g. we could also sink reads if
+ // their memory location is not modified in the vector loop.
+ if (Def->getParent() == VPBB || Def->mayHaveSideEffects() ||
+ Def->mayReadOrWriteMemory())
+ continue;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Op))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ continue;
+
+ WorkList.insert(std::make_pair(VPBB, Def));
+ }
}
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
- for (unsigned I = 0; I != WorkList.size(); ++I) {
+ for (const auto &Item : WorkList) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
- std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
+ std::tie(SinkTo, SinkCandidate) = Item;
- bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
// or all users outside of SinkTo must be uniform-after-vectorization (
// i.e., only first lane is used) . In the latter case, we need to duplicate
// SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
+ bool NeedsDuplicating = false;
+ auto CanSinkWithUser = [SinkTo, SinkCandidate,
+ &NeedsDuplicating](VPUser *U) {
auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
+ NeedsDuplicating |= UI->onlyFirstLaneUsed(SinkCandidate);
// We only know how to duplicate VPReplicateRecipes and
// VPScalarIVStepsRecipes for now.
return NeedsDuplicating &&
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 9deab9063d710..f9648c50e3876 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -133,8 +133,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -292,8 +292,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK: pred.store.if:
-; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 70e730f0284c0..19e87c12dbee3 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -44,10 +44,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -55,10 +55,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -120,10 +120,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -131,10 +131,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -266,10 +266,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -277,10 +277,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index a85718d1a382f..9d06f425782a8 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -507,9 +507,6 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 5, i64 -1, i64 -1, i64 -1>, [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[COND]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
@@ -525,7 +522,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP47]], i64 8
; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
@@ -534,7 +532,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP48]], i64 16
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP17]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
@@ -543,7 +542,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP54]], i64 24
; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP22]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
@@ -563,7 +563,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP55]], i64 8
; CHECK-NEXT: [[TMP34:%.*]] = load i64, ptr [[TMP33]], align 4
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP31]], i64 [[TMP34]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]]
@@ -572,7 +573,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP26]], i64 2
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if11:
-; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP56]], i64 16
; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP38]], align 4
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP36]], i64 [[TMP39]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
@@ -581,7 +583,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP26]], i64 3
; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.if13:
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP57]], i64 24
; CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr [[TMP43]], align 4
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i64> [[TMP41]], i64 [[TMP44]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
@@ -806,9 +809,6 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 2.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
@@ -824,7 +824,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP53:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP53]], i64 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
@@ -833,7 +834,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP54]], i64 8
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP17]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
@@ -842,7 +844,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP55:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP55]], i64 12
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP22]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
@@ -863,7 +866,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds...
[truncated]
|
|
@llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesRewrite sinkScalarOperands in VPlanTransforms for clarity, with minimal test changes. Patch is 21.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151696.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..04521573298d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -122,53 +122,64 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
}
static bool sinkScalarOperands(VPlan &Plan) {
- auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
- for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
+ for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
+ VPBasicBlock *VPBB =
+ dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
for (auto &Recipe : *VPBB) {
- for (VPValue *Op : Recipe.operands())
- if (auto *Def =
- dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert(std::make_pair(VPBB, Def));
+ for (VPValue *Op : Recipe.operands()) {
+ auto *Def =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
+ if (!Def)
+ continue;
+
+ // We only know how to duplicate VPReplicateRecipes and
+ // VPScalarIVStepsRecipes for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Def))
+ continue;
+
+ // TODO: Relax checks in the future, e.g. we could also sink reads if
+ // their memory location is not modified in the vector loop.
+ if (Def->getParent() == VPBB || Def->mayHaveSideEffects() ||
+ Def->mayReadOrWriteMemory())
+ continue;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Op))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ continue;
+
+ WorkList.insert(std::make_pair(VPBB, Def));
+ }
}
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
- for (unsigned I = 0; I != WorkList.size(); ++I) {
+ for (const auto &Item : WorkList) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
- std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
+ std::tie(SinkTo, SinkCandidate) = Item;
- bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
// or all users outside of SinkTo must be uniform-after-vectorization (
// i.e., only first lane is used) . In the latter case, we need to duplicate
// SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
+ bool NeedsDuplicating = false;
+ auto CanSinkWithUser = [SinkTo, SinkCandidate,
+ &NeedsDuplicating](VPUser *U) {
auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
+ NeedsDuplicating |= UI->onlyFirstLaneUsed(SinkCandidate);
// We only know how to duplicate VPReplicateRecipes and
// VPScalarIVStepsRecipes for now.
return NeedsDuplicating &&
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 9deab9063d710..f9648c50e3876 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -133,8 +133,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -292,8 +292,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK: pred.store.if:
-; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 70e730f0284c0..19e87c12dbee3 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -44,10 +44,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -55,10 +55,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -120,10 +120,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -131,10 +131,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -266,10 +266,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -277,10 +277,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index a85718d1a382f..9d06f425782a8 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -507,9 +507,6 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 5, i64 -1, i64 -1, i64 -1>, [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[COND]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
@@ -525,7 +522,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP47]], i64 8
; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
@@ -534,7 +532,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP48]], i64 16
; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP17]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
@@ -543,7 +542,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP54]], i64 24
; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP22]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
@@ -563,7 +563,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP55]], i64 8
; CHECK-NEXT: [[TMP34:%.*]] = load i64, ptr [[TMP33]], align 4
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i64> [[TMP31]], i64 [[TMP34]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]]
@@ -572,7 +573,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP26]], i64 2
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
; CHECK: pred.load.if11:
-; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP56]], i64 16
; CHECK-NEXT: [[TMP39:%.*]] = load i64, ptr [[TMP38]], align 4
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i64> [[TMP36]], i64 [[TMP39]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]]
@@ -581,7 +583,8 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP26]], i64 3
; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
; CHECK: pred.load.if13:
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP57]], i64 24
; CHECK-NEXT: [[TMP44:%.*]] = load i64, ptr [[TMP43]], align 4
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x i64> [[TMP41]], i64 [[TMP44]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]]
@@ -806,9 +809,6 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 2.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[PREDPHI15:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
@@ -824,7 +824,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP53:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP53]], i64 4
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP12]], i64 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
@@ -833,7 +834,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
; CHECK: pred.load.if3:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP54]], i64 8
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP17]], i64 2
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]]
@@ -842,7 +844,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
; CHECK: pred.load.if5:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[SRC1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP55:%.*]] = getelementptr float, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP55]], i64 12
; CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP22]], i64 3
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
@@ -863,7 +866,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP26]], i64 1
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
; CHECK: pred.load.if9:
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds...
[truncated]
|
f3b23d8 to
0395041
Compare
|
Gentle ping. |
0395041 to
402ba0f
Compare
|
Gentle ping. |
1 similar comment
|
Gentle ping. |
|
Gentle ping. I have some improvements planned to handle more recipes, but I think it would be good to have this reviewed first. |
|
Can you please add more description to the title and the commit message why you're making the change and what the motivation is? For example, is this refactoring in preparation for follow-on work? |
Done, thanks. |
402ba0f to
66d6e65
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like there are some crashes with this patch:
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"
; Function Attrs: optsize
define void @test(ptr %0, i64 %wide.trip.count.i, i16 %1) #0 {
entry:
br label %for.body.i
for.body.i: ; preds = %for.body.i, %entry
%indvars.iv.i37 = phi i64 [ 0, %entry ], [ %indvars.iv.next.i39, %for.body.i ]
%arrayidx.i38 = getelementptr i16, ptr %0, i64 %indvars.iv.i37
%2 = load i16, ptr %arrayidx.i38, align 2
%conv5.i = mul i16 %2, %1
store i16 %conv5.i, ptr %arrayidx.i38, align 2
%indvars.iv.next.i39 = add i64 %indvars.iv.i37, 1
%exitcond.not.i40 = icmp eq i64 %indvars.iv.i37, %wide.trip.count.i
br i1 %exitcond.not.i40, label %exit, label %for.body.i
exit: ; preds = %for.body.i
ret void
}
attributes #0 = { optsize }
Thanks, there was a thinko: @@ -170,10 +170,10 @@ static bool sinkScalarOperands(VPlan &Plan) {
}
// Try to sink each replicate or scalar IV steps recipe in the worklist.
- for (const auto &Item : WorkList) {
+ for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
- std::tie(SinkTo, SinkCandidate) = Item;
+ std::tie(SinkTo, SinkCandidate) = WorkList[I];... but now there are more crashes. Investigating. |
66d6e65 to
5321e93
Compare
|
It turns out that there was a huge thinko, and that the patch is actually non-functional. |
|
Gentle ping. |
1 similar comment
|
Gentle ping. |
a6ce15a to
038664c
Compare
Rewrite sinkScalarOperands in VPlanTransforms for clarity, with minimal test changes.
038664c to
88c6f00
Compare
Rewrite sinkScalarOperands in VPlanTransforms for clarity, in preparation for follow-up work to extend it to handle more recipes.