-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Polly] Add vectorize metadata to loops identified as vectorizable by polly #113994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
337ef36
374788e
fd61d16
075a778
fe01b97
c368310
92703e2
7a51d22
1a07e39
2e84b67
9771c7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -54,6 +54,11 @@ static cl::opt<bool> Verify("polly-codegen-verify", | |||||||||||||||||||||||||||||||||||||||||||||||||||||
| cl::desc("Verify the function generated by Polly"), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cl::Hidden, cl::cat(PollyCategory)); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cl::opt<bool> PollyVectorizeMetadata( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| "polly-annotate-metadata-vectorize", | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cl::desc("Append vectorize enable/disable metadata from polly"), | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| bool polly::PerfMonitoring; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| static cl::opt<bool, true> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -233,6 +238,30 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, | |||||||||||||||||||||||||||||||||||||||||||||||||||||
| NodeBuilder.allocateNewArrays(StartExitBlocks); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Annotator.buildAliasScopes(S); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // The code below annotates the "llvm.loop.vectorize.enable" to false | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // for the code flow taken when RTCs fail. Because we don't want the | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // Loop Vectorizer to come in later and vectorize the original fall back | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // loop when 'polly-annotate-metadata-vectorize' is passed. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (PollyVectorizeMetadata) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| LLVMContext &Ctx = S.getFunction().getContext(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| for (Loop *L : LI.getLoopsInPreorder()) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (!L || !S.contains(L)) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| MDNode *LoopID = L->getLoopID(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| SmallVector<Metadata *, 3> Args; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if (LoopID) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| for (unsigned i = 0, e = LoopID->getNumOperands(); i != e; ++i) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Args.push_back(LoopID->getOperand(i)); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| else | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Args.push_back(nullptr); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Annotator.addVectorizeMetadata(Ctx, &Args, false); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| MDNode *NewLoopID = MDNode::get(Ctx, Args); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| NewLoopID->replaceOperandWith(0, NewLoopID); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| L->setLoopID(NewLoopID); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||
| LLVMContext &Ctx = S.getFunction().getContext(); | |
| for (Loop *L : LI.getLoopsInPreorder()) { | |
| if (!L || !S.contains(L)) | |
| continue; | |
| MDNode *LoopID = L->getLoopID(); | |
| SmallVector<Metadata *, 3> Args; | |
| if (LoopID) | |
| for (unsigned i = 0, e = LoopID->getNumOperands(); i != e; ++i) | |
| Args.push_back(LoopID->getOperand(i)); | |
| else | |
| Args.push_back(nullptr); | |
| Annotator.addVectorizeMetadata(Ctx, &Args, false); | |
| MDNode *NewLoopID = MDNode::get(Ctx, Args); | |
| NewLoopID->replaceOperandWith(0, NewLoopID); | |
| L->setLoopID(NewLoopID); | |
| } | |
| #include "llvm/Transforms/Utils/LoopUtils.h" | |
| LLVMContext &Ctx = S.getFunction().getContext(); | |
| for (Loop *L : LI.getLoopsInPreorder()) { | |
| if (!S.contains(L)) | |
| continue; | |
| addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0) | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A small query here ..
Using addStringMetadataToLoop API makes the value get set as i32.
Something like
!2 = !{!"llvm.loop.vectorize.enable", i32 0}
Though the behavior is same, Is it okay, as the value should be i1 according to LangRef.rst?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does not make a difference:
| unsigned Val = C->getZExtValue(); |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -128,8 +128,28 @@ void ScopAnnotator::popLoop(bool IsParallel) { | |||||
| LoopAttrEnv.pop_back(); | ||||||
| } | ||||||
|
|
||||||
| void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel, | ||||||
| bool IsLoopVectorizerDisabled) const { | ||||||
| void ScopAnnotator::addVectorizeMetadata(LLVMContext &Ctx, | ||||||
|
||||||
| void ScopAnnotator::addVectorizeMetadata(LLVMContext &Ctx, | |
| static void addVectorizeMetadata(LLVMContext &Ctx, |
AFICS this does not use any ScopAnnotator members
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args, | |
| static void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args, |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interface descriptions usually go to the declaration in the header files as Doxygen comment (/// @param EnableVectorizeMetadata If no value is passed, we don't annotate any vectorize metadata.)
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |||||||||||||||||||||||||||
| #include "polly/CodeGen/LoopGenerators.h" | ||||||||||||||||||||||||||||
| #include "polly/Options.h" | ||||||||||||||||||||||||||||
| #include "polly/ScopDetection.h" | ||||||||||||||||||||||||||||
| #include "polly/ScopInfo.h" | ||||||||||||||||||||||||||||
| #include "llvm/Analysis/LoopInfo.h" | ||||||||||||||||||||||||||||
| #include "llvm/IR/DataLayout.h" | ||||||||||||||||||||||||||||
| #include "llvm/IR/DebugInfoMetadata.h" | ||||||||||||||||||||||||||||
|
|
@@ -35,6 +36,8 @@ static cl::opt<int, true> | |||||||||||||||||||||||||||
| cl::Hidden, cl::location(polly::PollyNumThreads), | ||||||||||||||||||||||||||||
| cl::init(0), cl::cat(PollyCategory)); | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| extern cl::opt<bool> PollyVectorizeMetadata; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( | ||||||||||||||||||||||||||||
| "polly-scheduling", | ||||||||||||||||||||||||||||
| cl::desc("Scheduling type of parallel OpenMP for loops"), | ||||||||||||||||||||||||||||
|
|
@@ -159,8 +162,20 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, | |||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Create the loop latch and annotate it as such. | ||||||||||||||||||||||||||||
| BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); | ||||||||||||||||||||||||||||
| if (Annotator) | ||||||||||||||||||||||||||||
| Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Don't annotate vectorize metadata when both LoopVectDisabled and | ||||||||||||||||||||||||||||
| // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false | ||||||||||||||||||||||||||||
| // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata | ||||||||||||||||||||||||||||
| // to true. | ||||||||||||||||||||||||||||
| if (Annotator) { | ||||||||||||||||||||||||||||
| if (!LoopVectDisabled && !PollyVectorizeMetadata) | ||||||||||||||||||||||||||||
| Annotator->annotateLoopLatch(B, Parallel); | ||||||||||||||||||||||||||||
| else | ||||||||||||||||||||||||||||
| Annotator->annotateLoopLatch( | ||||||||||||||||||||||||||||
| B, Parallel, | ||||||||||||||||||||||||||||
| /*EnableVectorizeMetadata*/ !LoopVectDisabled && | ||||||||||||||||||||||||||||
| PollyVectorizeMetadata); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
| if (!LoopVectDisabled && !PollyVectorizeMetadata) | |
| Annotator->annotateLoopLatch(B, Parallel); | |
| else | |
| Annotator->annotateLoopLatch( | |
| B, Parallel, | |
| /*EnableVectorizeMetadata*/ !LoopVectDisabled && | |
| PollyVectorizeMetadata); | |
| std::optional<bool> EnableVectorizeMetadata; | |
| if (LoopVectDisabled) | |
| EnableVectorizeMetadata = false; | |
| else if (PollyVectorizeMetadata) | |
| EnableVectorizeMetadata = true; | |
| Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata); |
If I got the boolean logic correct
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| ; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s | ||
|
|
||
| ; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is | ||
| ; passed. | ||
|
|
||
| ; void add(int *A, int *B, int *C,int n) { | ||
| ; for(int i=0; i<n; i++) | ||
| ; C[i] += A[i] + B[i]; | ||
| ; } | ||
|
|
||
| ; CHECK: for.body: | ||
| ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] | ||
| ; CHECK: polly.stmt.for.body: | ||
| ; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]] | ||
| ; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]} | ||
| ; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 false} | ||
| ; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]} | ||
| ; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true} | ||
|
|
||
| target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" | ||
| target triple = "aarch64-unknown-linux-gnu" | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable | ||
| define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 { | ||
| entry: | ||
| br label %entry.split | ||
|
|
||
| entry.split: ; preds = %entry | ||
| %cmp10 = icmp sgt i32 %n, 0 | ||
| br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup | ||
|
|
||
| for.body.preheader: ; preds = %entry.split | ||
| %wide.trip.count = zext nneg i32 %n to i64 | ||
| br label %for.body | ||
|
|
||
| for.cond.cleanup.loopexit: ; preds = %for.body | ||
| br label %for.cond.cleanup | ||
|
|
||
| for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split | ||
| ret void | ||
|
|
||
| for.body: ; preds = %for.body.preheader, %for.body | ||
| %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | ||
| %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv | ||
| %0 = load i32, ptr %arrayidx, align 4 | ||
| %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv | ||
| %1 = load i32, ptr %arrayidx2, align 4 | ||
| %add = add nsw i32 %1, %0 | ||
| %arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv | ||
| %2 = load i32, ptr %arrayidx4, align 4 | ||
| %add5 = add nsw i32 %add, %2 | ||
| store i32 %add5, ptr %arrayidx4, align 4 | ||
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
| %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count | ||
| br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 | ||
| } | ||
|
|
||
| attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" } | ||
|
|
||
| !0 = distinct !{!0, !1} | ||
| !1 = !{!"llvm.loop.mustprogress"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd be fine if the loop vectorizer is always disabled for fallback code. Would it means too many test updates?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am seeing around 19 failures.
Should we have it as separate patch.
Or can it be part of this patch itself ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Meinersbur Please provide inputs on if we have to add the test case changes for 19 failures as separate patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can add the test changes into PR. If that's too much work I'd accept the
-polly-annotate-metadata-vectorizeopt-in as well.