-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Polly] Add vectorize metadata to loops identified as vectorizable by polly #113994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
337ef36
374788e
fd61d16
075a778
fe01b97
c368310
92703e2
7a51d22
1a07e39
2e84b67
9771c7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -54,6 +54,11 @@ static cl::opt<bool> Verify("polly-codegen-verify", | |||||||
| cl::desc("Verify the function generated by Polly"), | ||||||||
| cl::Hidden, cl::cat(PollyCategory)); | ||||||||
|
|
||||||||
| cl::opt<bool> PollyVectorizeMetadata( | ||||||||
| "polly-annotate-metadata-vectorize", | ||||||||
| cl::desc("Append vectorize enable/disable metadata from polly"), | ||||||||
| cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); | ||||||||
|
|
||||||||
| bool polly::PerfMonitoring; | ||||||||
|
|
||||||||
| static cl::opt<bool, true> | ||||||||
|
|
@@ -233,6 +238,26 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, | |||||||
| NodeBuilder.allocateNewArrays(StartExitBlocks); | ||||||||
| Annotator.buildAliasScopes(S); | ||||||||
|
|
||||||||
| // The code below annotates the "llvm.loop.vectorize.enable" to false | ||||||||
| // for the code flow taken when RTCs fail. Because we don't want the | ||||||||
| // Loop Vectorizer to come in later and vectorize the original fall back | ||||||||
| // loop when 'polly-annotate-metadata-vectorize' is passed. | ||||||||
| if (PollyVectorizeMetadata && &Annotator) { | ||||||||
kartcq marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||
| for (Loop *L : LI.getLoopsInPreorder()) { | ||||||||
| if (S.contains(L)) { | ||||||||
|
||||||||
| if (S.contains(L)) { | |
| if (!S.contains(L)) | |
| continue; |
kartcq marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not directly calling addVectorizeMetadata here? The ScopAnnotator was design with the assumption to be called on generated code only. Using it outside of it may result in unexpected situations.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -128,8 +128,25 @@ void ScopAnnotator::popLoop(bool IsParallel) { | |||||
| LoopAttrEnv.pop_back(); | ||||||
| } | ||||||
|
|
||||||
| void addVectorizeMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args, | ||||||
| bool EnableLoopVectorizer) { | ||||||
| MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable"); | ||||||
| ConstantInt *Value = | ||||||
| ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer); | ||||||
| ValueAsMetadata *PropValue = ValueAsMetadata::get(Value); | ||||||
| Args->push_back(MDNode::get(Ctx, {PropName, PropValue})); | ||||||
| } | ||||||
|
|
||||||
| void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args, | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) { | ||||||
| MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses"); | ||||||
| MDNode *AccGroup = ParallelLoops.back(); | ||||||
| Args->push_back(MDNode::get(Ctx, {PropName, AccGroup})); | ||||||
| } | ||||||
|
|
||||||
| void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel, | ||||||
| bool IsLoopVectorizerDisabled) const { | ||||||
| bool setVectorizeMetadata, | ||||||
|
||||||
| bool setVectorizeMetadata, | |
| bool SetVectorizeMetadata, |
Could add a doxygen comment on what the parameters mean?
I am getting confused with the flag combinations. Since its terrnary logic, consider std::optional<bool> EnableLoopVectorizer where std::nullopt means no metadata set.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |||||
| #include "polly/CodeGen/LoopGenerators.h" | ||||||
| #include "polly/Options.h" | ||||||
| #include "polly/ScopDetection.h" | ||||||
| #include "polly/ScopInfo.h" | ||||||
| #include "llvm/Analysis/LoopInfo.h" | ||||||
| #include "llvm/IR/DataLayout.h" | ||||||
| #include "llvm/IR/DebugInfoMetadata.h" | ||||||
|
|
@@ -35,6 +36,8 @@ static cl::opt<int, true> | |||||
| cl::Hidden, cl::location(polly::PollyNumThreads), | ||||||
| cl::init(0), cl::cat(PollyCategory)); | ||||||
|
|
||||||
| extern cl::opt<bool> PollyVectorizeMetadata; | ||||||
|
|
||||||
| static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( | ||||||
| "polly-scheduling", | ||||||
| cl::desc("Scheduling type of parallel OpenMP for loops"), | ||||||
|
|
@@ -159,8 +162,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, | |||||
|
|
||||||
| // Create the loop latch and annotate it as such. | ||||||
| BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); | ||||||
| if (Annotator) | ||||||
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); | ||||||
|
|
||||||
| // If the 'polly-annotate-metadata-vectorize' flag is passed, we add | ||||||
| // the vectorize metadata. Otherwise we fall back to previous behavior | ||||||
|
||||||
| // of annotating the loop only when LoopVectDisabled is true. | ||||||
| if (Annotator) { | ||||||
| if (PollyVectorizeMetadata) | ||||||
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, | ||||||
| !LoopVectDisabled); | ||||||
| else if (LoopVectDisabled) | ||||||
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, false); | ||||||
|
||||||
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, false); | |
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/true, /*EnableLoopVectorizer=*/false); |
but why not:
if (Annotator)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/PollyVectorizeMetadata||LoopVectDisabled, /*EnableLoopVectorizer=*/!LoopVectDisabled&&!PollyVectorizeMetadata);or at least
else
Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/LoopVectDisabled, /*EnableLoopVectorizer=*/false);like before this commit
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| ; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s | ||
|
|
||
| ; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is | ||
| ; passed. | ||
|
|
||
| ; void add(int *A, int *B, int *C,int n) { | ||
| ; for(int i=0; i<n; i++) | ||
| ; C[i] += A[i] + B[i]; | ||
| ; } | ||
|
|
||
| ; CHECK: for.body: | ||
| ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] | ||
| ; CHECK: polly.stmt.for.body: | ||
| ; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]] | ||
| ; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]]} | ||
| ; CHECK: [[META2]] = !{!"llvm.loop.vectorize.enable", i1 false} | ||
| ; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]} | ||
| ; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true} | ||
|
|
||
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" | ||
| target triple = "aarch64-unknown-linux-gnu" | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable | ||
| define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 { | ||
| entry: | ||
| br label %entry.split | ||
|
|
||
| entry.split: ; preds = %entry | ||
| %cmp10 = icmp sgt i32 %n, 0 | ||
| br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup | ||
|
|
||
| for.body.preheader: ; preds = %entry.split | ||
| %wide.trip.count = zext nneg i32 %n to i64 | ||
| br label %for.body | ||
|
|
||
| for.cond.cleanup.loopexit: ; preds = %for.body | ||
| br label %for.cond.cleanup | ||
|
|
||
| for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split | ||
| ret void | ||
|
|
||
| for.body: ; preds = %for.body.preheader, %for.body | ||
| %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | ||
| %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv | ||
| %0 = load i32, ptr %arrayidx, align 4 | ||
| %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv | ||
| %1 = load i32, ptr %arrayidx2, align 4 | ||
| %add = add nsw i32 %1, %0 | ||
| %arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv | ||
| %2 = load i32, ptr %arrayidx4, align 4 | ||
| %add5 = add nsw i32 %add, %2 | ||
| store i32 %add5, ptr %arrayidx4, align 4 | ||
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
| %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count | ||
| br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 | ||
| } | ||
|
|
||
| attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" } | ||
|
|
||
| !0 = distinct !{!0, !1} | ||
| !1 = !{!"llvm.loop.mustprogress"} |
Uh oh!
There was an error while loading. Please reload this page.