Skip to content

Commit 7c34848

Browse files
authored
[VPlan] Hoist loads with invariant addresses using noalias metadata. (#166247)
This patch implements a transform to hoists single-scalar replicated loads with invariant addresses out of the vector loop to the preheader when scoped noalias metadata proves they cannot alias with any stores in the loop. This enables hosting of loads we can prove do not alias any stores in the loop due to memory runtime checks added during vectorization. PR: #166247
1 parent 603ac57 commit 7c34848

22 files changed

+265
-110
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
4646
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
4747
const CallBase *Call2, AAQueryInfo &AAQI);
4848

49-
LLVM_ABI void
49+
LLVM_ABI static void
5050
collectScopedDomains(const MDNode *NoAlias,
51-
SmallPtrSetImpl<const MDNode *> &Domains) const;
51+
SmallPtrSetImpl<const MDNode *> &Domains);
5252

53-
private:
54-
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
53+
LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
54+
const MDNode *NoAlias);
5555
};
5656

5757
/// Analysis pass providing a never-invalidated alias analysis result.

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
116116

117117
/// Collect the set of scoped domains relevant to the noalias scopes.
118118
void ScopedNoAliasAAResult::collectScopedDomains(
119-
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
119+
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
120120
if (!NoAlias)
121121
return;
122122
assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
127127
}
128128

129129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
130-
const MDNode *NoAlias) const {
130+
const MDNode *NoAlias) {
131131
if (!Scopes || !NoAlias)
132132
return true;
133133

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/ADT/ilist.h"
3333
#include "llvm/ADT/ilist_node.h"
3434
#include "llvm/Analysis/IVDescriptors.h"
35+
#include "llvm/Analysis/MemoryLocation.h"
3536
#include "llvm/Analysis/VectorUtils.h"
3637
#include "llvm/IR/DebugLoc.h"
3738
#include "llvm/IR/FMF.h"
@@ -981,6 +982,13 @@ class VPIRMetadata {
981982
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
982983
/// nodes that are common to both.
983984
void intersect(const VPIRMetadata &MD);
985+
986+
/// Get metadata of kind \p Kind. Returns nullptr if not found.
987+
MDNode *getMetadata(unsigned Kind) const {
988+
auto It =
989+
find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
990+
return It != Metadata.end() ? It->second : nullptr;
991+
}
984992
};
985993

986994
/// This is a concrete Recipe that models a single VPlan-level instruction.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@
2424
#include "llvm/ADT/APInt.h"
2525
#include "llvm/ADT/PostOrderIterator.h"
2626
#include "llvm/ADT/STLExtras.h"
27+
#include "llvm/ADT/SetOperations.h"
2728
#include "llvm/ADT/SetVector.h"
29+
#include "llvm/ADT/SmallPtrSet.h"
2830
#include "llvm/ADT/TypeSwitch.h"
2931
#include "llvm/Analysis/IVDescriptors.h"
3032
#include "llvm/Analysis/InstSimplifyFolder.h"
3133
#include "llvm/Analysis/LoopInfo.h"
34+
#include "llvm/Analysis/MemoryLocation.h"
3235
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
36+
#include "llvm/Analysis/ScopedNoAliasAA.h"
3337
#include "llvm/Analysis/VectorUtils.h"
3438
#include "llvm/IR/Intrinsics.h"
3539
#include "llvm/IR/MDBuilder.h"
40+
#include "llvm/IR/Metadata.h"
3641
#include "llvm/Support/Casting.h"
3742
#include "llvm/Support/TypeSize.h"
3843
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -2401,6 +2406,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
24012406
runPass(removeDeadRecipes, Plan);
24022407

24032408
runPass(createAndOptimizeReplicateRegions, Plan);
2409+
runPass(hoistInvariantLoads, Plan);
24042410
runPass(mergeBlocksIntoPredecessors, Plan);
24052411
runPass(licm, Plan);
24062412
}
@@ -3914,6 +3920,54 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
39143920
}
39153921
}
39163922

3923+
void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
3924+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
3925+
3926+
// Collect candidate loads with invariant addresses and noalias scopes
3927+
// metadata and memory-writing recipes with noalias metadata.
3928+
SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
3929+
SmallVector<MemoryLocation> Stores;
3930+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3931+
vp_depth_first_shallow(LoopRegion->getEntry()))) {
3932+
for (VPRecipeBase &R : *VPBB) {
3933+
// Only handle single-scalar replicated loads with invariant addresses.
3934+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
3935+
if (RepR->isPredicated() || !RepR->isSingleScalar() ||
3936+
RepR->getOpcode() != Instruction::Load)
3937+
continue;
3938+
3939+
VPValue *Addr = RepR->getOperand(0);
3940+
if (Addr->isDefinedOutsideLoopRegions()) {
3941+
MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
3942+
if (!Loc.AATags.Scope)
3943+
continue;
3944+
CandidateLoads.push_back({RepR, Loc});
3945+
}
3946+
}
3947+
if (R.mayWriteToMemory()) {
3948+
auto Loc = vputils::getMemoryLocation(R);
3949+
if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
3950+
return;
3951+
Stores.push_back(*Loc);
3952+
}
3953+
}
3954+
}
3955+
3956+
VPBasicBlock *Preheader = Plan.getVectorPreheader();
3957+
for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
3958+
// Hoist the load to the preheader if it doesn't alias with any stores
3959+
// according to the noalias metadata. Other loads should have been hoisted
3960+
// by other passes
3961+
const AAMDNodes &LoadAA = LoadLoc.AATags;
3962+
if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
3963+
return !ScopedNoAliasAAResult::mayAliasInScopes(
3964+
LoadAA.Scope, StoreLoc.AATags.NoAlias);
3965+
})) {
3966+
LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
3967+
}
3968+
}
3969+
}
3970+
39173971
void VPlanTransforms::materializeConstantVectorTripCount(
39183972
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
39193973
PredicatedScalarEvolution &PSE) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,11 @@ struct VPlanTransforms {
309309
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
310310
static void materializeBroadcasts(VPlan &Plan);
311311

312+
/// Hoist single-scalar loads with invariant addresses out of the vector loop
313+
/// to the preheader, if they are proven not to alias with any stores in the
314+
/// plan using noalias metadata.
315+
static void hoistInvariantLoads(VPlan &Plan);
316+
312317
// Materialize vector trip counts for constants early if it can simply be
313318
// computed as (Original TC / VF * UF) * VF * UF.
314319
static void

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "VPlanDominatorTree.h"
1212
#include "VPlanPatternMatch.h"
1313
#include "llvm/ADT/TypeSwitch.h"
14+
#include "llvm/Analysis/MemoryLocation.h"
1415
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
1516

1617
using namespace llvm;
@@ -393,3 +394,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
393394
return VPB->getNumSuccessors() == 2 &&
394395
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
395396
}
397+
398+
std::optional<MemoryLocation>
399+
vputils::getMemoryLocation(const VPRecipeBase &R) {
400+
return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
401+
.Case<VPWidenMemoryRecipe, VPInterleaveBase, VPReplicateRecipe>(
402+
[](auto *S) {
403+
MemoryLocation Loc;
404+
// Populate noalias metadata from VPIRMetadata.
405+
if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
406+
Loc.AATags.NoAlias = NoAliasMD;
407+
if (MDNode *AliasScopeMD =
408+
S->getMetadata(LLVMContext::MD_alias_scope))
409+
Loc.AATags.Scope = AliasScopeMD;
410+
return Loc;
411+
})
412+
.Default([](auto *) { return std::nullopt; });
413+
}

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Support/Compiler.h"
1414

1515
namespace llvm {
16+
class MemoryLocation;
1617
class ScalarEvolution;
1718
class SCEV;
1819
} // namespace llvm
@@ -74,6 +75,11 @@ getRecipesForUncountableExit(VPlan &Plan,
7475
SmallVectorImpl<VPRecipeBase *> &Recipes,
7576
SmallVectorImpl<VPRecipeBase *> &GEPs);
7677

78+
/// Return a MemoryLocation for \p R with noalias metadata populated from
79+
/// \p R, if the recipe is supported and std::nullopt otherwise. The pointer of
80+
/// the location is conservatively set to nullptr.
81+
std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
82+
7783
/// Extracts and returns NoWrap and FastMath flags from the induction binop in
7884
/// \p ID.
7985
inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) {

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
386386
; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
387387
; DEFAULT-NEXT: [[ENTRY:.*:]]
388388
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
389-
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
389+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
390390
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
391391
; DEFAULT: [[VECTOR_MEMCHECK]]:
392392
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -427,16 +427,16 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
427427
; DEFAULT: [[VECTOR_PH]]:
428428
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
429429
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
430-
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
431-
; DEFAULT: [[VECTOR_BODY]]:
432-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
433-
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
430+
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
434431
; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
432+
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
435433
; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
436-
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
437434
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
438435
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
439436
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
437+
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
438+
; DEFAULT: [[VECTOR_BODY]]:
439+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
440440
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
441441
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
442442
; DEFAULT: [[PRED_STORE_IF]]:

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
132132
; DEFAULT: vector.ph:
133133
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
134134
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
135-
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
136-
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
137-
; DEFAULT: vector.body:
138-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
139135
; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
140136
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
141137
; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
142138
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
139+
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
143140
; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
141+
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
142+
; DEFAULT: vector.body:
143+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
144144
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
145145
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
146146
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
@@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
156156
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
157157
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
158158
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
159-
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
160-
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
161-
; DEFAULT: vec.epilog.vector.body:
162-
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
163159
; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
164160
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
165161
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
166162
; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
163+
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
167164
; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
165+
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
166+
; DEFAULT: vec.epilog.vector.body:
167+
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
168168
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
169169
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
170170
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8

llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) {
1717
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1818
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1919
; CHECK: [[VECTOR_PH]]:
20+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
21+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
22+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2023
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
2124
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
2225
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2326
; CHECK: [[VECTOR_BODY]]:
2427
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2528
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
26-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
27-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP6]], i64 0
28-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2929
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT3]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
3030
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
3131
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]

0 commit comments

Comments
 (0)