Skip to content

Commit 1fb09ad

Browse files
committed
[VPlan] Hoist loads with invariant addresses using scoped noalias metadata.
This patch implements a transform to hoists single-scalar replicated loads with invariant addresses out of the vector loop to the preheader when scoped noalias metadata proves they cannot alias with any stores in the loop. This enables hosting of loads we can prove do not alias any stores in the loop due to memory runtime checks added during vectorization.
1 parent e3cfb17 commit 1fb09ad

22 files changed

+215
-134
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
4646
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
4747
const CallBase *Call2, AAQueryInfo &AAQI);
4848

49-
LLVM_ABI void
49+
LLVM_ABI static void
5050
collectScopedDomains(const MDNode *NoAlias,
51-
SmallPtrSetImpl<const MDNode *> &Domains) const;
51+
SmallPtrSetImpl<const MDNode *> &Domains);
5252

53-
private:
54-
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
53+
LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
54+
const MDNode *NoAlias);
5555
};
5656

5757
/// Analysis pass providing a never-invalidated alias analysis result.

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
116116

117117
/// Collect the set of scoped domains relevant to the noalias scopes.
118118
void ScopedNoAliasAAResult::collectScopedDomains(
119-
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
119+
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
120120
if (!NoAlias)
121121
return;
122122
assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
127127
}
128128

129129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
130-
const MDNode *NoAlias) const {
130+
const MDNode *NoAlias) {
131131
if (!Scopes || !NoAlias)
132132
return true;
133133

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/ADT/ilist.h"
3333
#include "llvm/ADT/ilist_node.h"
3434
#include "llvm/Analysis/IVDescriptors.h"
35+
#include "llvm/Analysis/MemoryLocation.h"
3536
#include "llvm/Analysis/VectorUtils.h"
3637
#include "llvm/IR/DebugLoc.h"
3738
#include "llvm/IR/FMF.h"
@@ -983,6 +984,13 @@ class VPIRMetadata {
983984
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
984985
/// nodes that are common to both.
985986
void intersect(const VPIRMetadata &MD);
987+
988+
/// Get metadata of kind \p Kind. Returns nullptr if not found.
989+
MDNode *getMetadata(unsigned Kind) const {
990+
auto It = llvm::find_if(Metadata,
991+
[Kind](const auto &P) { return P.first == Kind; });
992+
return It != Metadata.end() ? It->second : nullptr;
993+
}
986994
};
987995

988996
/// This is a concrete Recipe that models a single VPlan-level instruction.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@
2424
#include "llvm/ADT/APInt.h"
2525
#include "llvm/ADT/PostOrderIterator.h"
2626
#include "llvm/ADT/STLExtras.h"
27+
#include "llvm/ADT/SetOperations.h"
2728
#include "llvm/ADT/SetVector.h"
29+
#include "llvm/ADT/SmallPtrSet.h"
2830
#include "llvm/ADT/TypeSwitch.h"
2931
#include "llvm/Analysis/IVDescriptors.h"
3032
#include "llvm/Analysis/InstSimplifyFolder.h"
3133
#include "llvm/Analysis/LoopInfo.h"
34+
#include "llvm/Analysis/MemoryLocation.h"
3235
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
36+
#include "llvm/Analysis/ScopedNoAliasAA.h"
3337
#include "llvm/Analysis/VectorUtils.h"
3438
#include "llvm/IR/Intrinsics.h"
3539
#include "llvm/IR/MDBuilder.h"
40+
#include "llvm/IR/Metadata.h"
3641
#include "llvm/Support/Casting.h"
3742
#include "llvm/Support/TypeSize.h"
3843
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -2389,6 +2394,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
23892394
runPass(removeDeadRecipes, Plan);
23902395

23912396
runPass(createAndOptimizeReplicateRegions, Plan);
2397+
runPass(hoistInvariantLoads, Plan);
23922398
runPass(mergeBlocksIntoPredecessors, Plan);
23932399
runPass(licm, Plan);
23942400
}
@@ -3904,6 +3910,54 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
39043910
}
39053911
}
39063912

3913+
void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
3914+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
3915+
3916+
// Collect candidate loads with invariant addresses and noalias scopes
3917+
// metadata and memory-writing recipes with noalias metadata.
3918+
SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
3919+
SmallVector<MemoryLocation> Stores;
3920+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3921+
vp_depth_first_shallow(LoopRegion->getEntry()))) {
3922+
for (VPRecipeBase &R : *VPBB) {
3923+
// Only handle single-scalar replicated loads with invariant addresses.
3924+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
3925+
if (RepR->isPredicated() || !RepR->isSingleScalar() ||
3926+
RepR->getOpcode() != Instruction::Load)
3927+
continue;
3928+
3929+
VPValue *Addr = RepR->getOperand(0);
3930+
if (Addr->isDefinedOutsideLoopRegions()) {
3931+
MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
3932+
if (!Loc.AATags.Scope)
3933+
continue;
3934+
CandidateLoads.push_back({RepR, Loc});
3935+
}
3936+
}
3937+
if (R.mayWriteToMemory()) {
3938+
auto Loc = vputils::getMemoryLocation(R);
3939+
if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
3940+
return;
3941+
Stores.push_back(*Loc);
3942+
}
3943+
}
3944+
}
3945+
3946+
VPBasicBlock *Preheader = Plan.getVectorPreheader();
3947+
for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
3948+
// Hoist the load to the preheader if it doesn't alias with any stores
3949+
// according to the noalias metadata. Other loads should have been hoisted
3950+
// by other passes
3951+
const AAMDNodes &LoadAA = LoadLoc.AATags;
3952+
if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
3953+
return !ScopedNoAliasAAResult::mayAliasInScopes(
3954+
LoadAA.Scope, StoreLoc.AATags.NoAlias);
3955+
})) {
3956+
LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
3957+
}
3958+
}
3959+
}
3960+
39073961
void VPlanTransforms::materializeConstantVectorTripCount(
39083962
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
39093963
PredicatedScalarEvolution &PSE) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,11 @@ struct VPlanTransforms {
308308
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
309309
static void materializeBroadcasts(VPlan &Plan);
310310

311+
/// Hoist single-scalar loads with invariant addresses out of the vector loop
312+
/// to the preheader, if they are proven not to alias with any stores in the
313+
/// plan using noalias metadata.
314+
static void hoistInvariantLoads(VPlan &Plan);
315+
311316
// Materialize vector trip counts for constants early if it can simply be
312317
// computed as (Original TC / VF * UF) * VF * UF.
313318
static void

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "VPlanDominatorTree.h"
1212
#include "VPlanPatternMatch.h"
1313
#include "llvm/ADT/TypeSwitch.h"
14+
#include "llvm/Analysis/MemoryLocation.h"
1415
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
1516

1617
using namespace llvm;
@@ -393,3 +394,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
393394
return VPB->getNumSuccessors() == 2 &&
394395
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
395396
}
397+
398+
std::optional<MemoryLocation>
399+
vputils::getMemoryLocation(const VPRecipeBase &R) {
400+
return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
401+
.Case<VPWidenStoreRecipe, VPInterleaveBase, VPReplicateRecipe>(
402+
[](auto *S) {
403+
MemoryLocation Loc;
404+
// Populate noalias metadata from VPIRMetadata.
405+
if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
406+
Loc.AATags.NoAlias = NoAliasMD;
407+
if (MDNode *AliasScopeMD =
408+
S->getMetadata(LLVMContext::MD_alias_scope))
409+
Loc.AATags.Scope = AliasScopeMD;
410+
return Loc;
411+
})
412+
.Default([](auto *) { return std::nullopt; });
413+
}

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Support/Compiler.h"
1414

1515
namespace llvm {
16+
class MemoryLocation;
1617
class ScalarEvolution;
1718
class SCEV;
1819
} // namespace llvm
@@ -73,6 +74,10 @@ std::optional<VPValue *>
7374
getRecipesForUncountableExit(VPlan &Plan,
7475
SmallVectorImpl<VPRecipeBase *> &Recipes,
7576
SmallVectorImpl<VPRecipeBase *> &GEPs);
77+
78+
/// Return a MemoryLocation for \p R with noalias metadata populated from
79+
/// \p R. The pointer of the location is conservatively set to nullptr.
80+
std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
7681
} // namespace vputils
7782

7883
//===----------------------------------------------------------------------===//

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
386386
; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
387387
; DEFAULT-NEXT: [[ENTRY:.*:]]
388388
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
389-
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
389+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
390390
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
391391
; DEFAULT: [[VECTOR_MEMCHECK]]:
392392
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -427,16 +427,16 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
427427
; DEFAULT: [[VECTOR_PH]]:
428428
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
429429
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
430-
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
431-
; DEFAULT: [[VECTOR_BODY]]:
432-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
433-
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
430+
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
434431
; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
432+
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
435433
; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
436-
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
437434
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
438435
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
439436
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
437+
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
438+
; DEFAULT: [[VECTOR_BODY]]:
439+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
440440
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
441441
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
442442
; DEFAULT: [[PRED_STORE_IF]]:

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
132132
; DEFAULT: vector.ph:
133133
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
134134
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
135-
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
136-
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
137-
; DEFAULT: vector.body:
138-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
139135
; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
140136
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
141137
; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
142138
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
139+
; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
143140
; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
141+
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
142+
; DEFAULT: vector.body:
143+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
144144
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
145145
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
146146
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
@@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
156156
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
157157
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
158158
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
159-
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
160-
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
161-
; DEFAULT: vec.epilog.vector.body:
162-
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
163159
; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
164160
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
165161
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
166162
; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
163+
; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
167164
; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
165+
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
166+
; DEFAULT: vec.epilog.vector.body:
167+
; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
168168
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
169169
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
170170
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8

llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) {
1717
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1818
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1919
; CHECK: [[VECTOR_PH]]:
20+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
21+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
22+
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2023
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
2124
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
2225
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2326
; CHECK: [[VECTOR_BODY]]:
2427
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
2528
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
26-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
27-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP6]], i64 0
28-
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2929
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT3]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
3030
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
3131
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]

0 commit comments

Comments
 (0)