Skip to content
3 changes: 2 additions & 1 deletion polly/include/polly/CodeGen/IRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ class ScopAnnotator {

/// Annotate the loop latch @p B wrt. @p L.
void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const;
bool setVectorizeMetadata,
bool EnableLoopVectorizer) const;

/// Add alternative alias based pointers
///
Expand Down
25 changes: 25 additions & 0 deletions polly/lib/CodeGen/CodeGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::cat(PollyCategory));

cl::opt<bool> PollyVectorizeMetadata(
"polly-annotate-metadata-vectorize",
cl::desc("Append vectorize enable/disable metadata from polly"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));

bool polly::PerfMonitoring;

static cl::opt<bool, true>
Expand Down Expand Up @@ -233,6 +238,26 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);

// The code below annotates the "llvm.loop.vectorize.enable" to false
// for the code flow taken when RTCs fail. Because we don't want the
// Loop Vectorizer to come in later and vectorize the original fall back
// loop when 'polly-annotate-metadata-vectorize' is passed.
if (PollyVectorizeMetadata && &Annotator) {
for (Loop *L : LI.getLoopsInPreorder()) {
if (S.contains(L)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (S.contains(L)) {
if (!S.contains(L))
continue;

Coding standard

Annotator.pushLoop(L, false);
SmallVector<BasicBlock *, 4> LoopLatchBlocks;
L->getLoopLatches(LoopLatchBlocks);
for (BasicBlock *ControlBB : LoopLatchBlocks) {
BranchInst *Br = dyn_cast<BranchInst>(ControlBB->getTerminator());
if (Br)
Annotator.annotateLoopLatch(Br, L, false, true, false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not directly calling addVectorizeMetadata here? The ScopAnnotator was design with the assumption to be called on generated code only. Using it outside of it may result in unexpected situations.

}
Annotator.popLoop(false);
}
}
}

if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
Expand Down
36 changes: 22 additions & 14 deletions polly/lib/CodeGen/IRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,25 @@ void ScopAnnotator::popLoop(bool IsParallel) {
LoopAttrEnv.pop_back();
}

void addVectorizeMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
bool EnableLoopVectorizer) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *Value =
ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
static void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,

llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}

void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const {
bool setVectorizeMetadata,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool setVectorizeMetadata,
bool SetVectorizeMetadata,

Could add a doxygen comment on what the parameters mean?

I am getting confused with the flag combinations. Since its terrnary logic, consider std::optional<bool> EnableLoopVectorizer where std::nullopt means no metadata set.

bool EnableLoopVectorizer) const {
LLVMContext &Ctx = SE->getContext();
SmallVector<Metadata *, 3> Args;

Expand All @@ -145,19 +162,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
if (MData)
llvm::append_range(Args, drop_begin(MData->operands(), 1));
}

if (IsLoopVectorizerDisabled) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

if (IsParallel) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}
if (IsParallel)
addParallelMetadata(Ctx, &Args, ParallelLoops);
if (setVectorizeMetadata)
addVectorizeMetadata(Ctx, &Args, EnableLoopVectorizer);

// No metadata to annotate.
if (!MData && Args.size() <= 1)
Expand Down
18 changes: 16 additions & 2 deletions polly/lib/CodeGen/LoopGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
Expand All @@ -35,6 +36,8 @@ static cl::opt<int, true>
cl::Hidden, cl::location(polly::PollyNumThreads),
cl::init(0), cl::cat(PollyCategory));

extern cl::opt<bool> PollyVectorizeMetadata;

static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
"polly-scheduling",
cl::desc("Scheduling type of parallel OpenMP for loops"),
Expand Down Expand Up @@ -159,8 +162,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,

// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
if (Annotator)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);

// If the 'polly-annotate-metadata-vectorize' flag is passed, we add
// the vectorize metadata. Otherwise we fall back to previous behavior
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"previous behavior" referes to this commit. That does not make sense if you see this comment only in the source.

// of annotating the loop only when LoopVectDisabled is true.
if (Annotator) {
if (PollyVectorizeMetadata)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, true,
!LoopVectDisabled);
else if (LoopVectDisabled)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, false);
Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/true, /*EnableLoopVectorizer=*/false);

but why not:

if (Annotator)
   Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/PollyVectorizeMetadata||LoopVectDisabled,  /*EnableLoopVectorizer=*/!LoopVectDisabled&&!PollyVectorizeMetadata);

or at least

else
   Annotator->annotateLoopLatch(B, NewLoop, Parallel, /*SetVectorizeMetadata=*/LoopVectDisabled,  /*EnableLoopVectorizer=*/false);

like before this commit

else
Annotator->annotateLoopLatch(B, NewLoop, Parallel, false, false);
}

IV->addIncoming(IncrementedIV, HeaderBB);
if (GuardBB)
Expand Down
61 changes: 61 additions & 0 deletions polly/test/CodeGen/Metadata/basic_vec_annotate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s

; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
; passed.

; void add(int *A, int *B, int *C,int n) {
; for(int i=0; i<n; i++)
; C[i] += A[i] + B[i];
; }

; CHECK: for.body:
; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
; CHECK: polly.stmt.for.body:
; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]]}
; CHECK: [[META2]] = !{!"llvm.loop.vectorize.enable", i1 false}
; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
entry:
br label %entry.split

entry.split: ; preds = %entry
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry.split
%wide.trip.count = zext nneg i32 %n to i64
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split
ret void

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %1, %0
%arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
%2 = load i32, ptr %arrayidx4, align 4
%add5 = add nsw i32 %add, %2
store i32 %add5, ptr %arrayidx4, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
}

attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.mustprogress"}
Loading