Skip to content
9 changes: 6 additions & 3 deletions polly/include/polly/CodeGen/IRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,12 @@ class ScopAnnotator {
/// Annotate the new instruction @p I for all parallel loops.
void annotate(llvm::Instruction *I);

/// Annotate the loop latch @p B wrt. @p L.
void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const;
/// Annotate the loop latch @p B.
/// Last argument is optional, if no value is passed, we don't annotate
/// any vectorize metadata.
void annotateLoopLatch(
llvm::BranchInst *B, bool IsParallel,
std::optional<bool> EnableVectorizeMetadata = std::nullopt) const;

/// Add alternative alias based pointers
///
Expand Down
39 changes: 24 additions & 15 deletions polly/lib/CodeGen/IRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,26 @@ void ScopAnnotator::popLoop(bool IsParallel) {
LoopAttrEnv.pop_back();
}

void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const {
static void addVectorizeMetadata(LLVMContext &Ctx,
SmallVector<Metadata *, 3> *Args,
bool EnableLoopVectorizer) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *Value =
ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
static void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,

llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}

void ScopAnnotator::annotateLoopLatch(
BranchInst *B, bool IsParallel,
std::optional<bool> EnableVectorizeMetadata) const {
LLVMContext &Ctx = SE->getContext();
SmallVector<Metadata *, 3> Args;

Expand All @@ -145,19 +163,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
if (MData)
llvm::append_range(Args, drop_begin(MData->operands(), 1));
}

if (IsLoopVectorizerDisabled) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

if (IsParallel) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}
if (IsParallel)
addParallelMetadata(Ctx, &Args, ParallelLoops);
if (EnableVectorizeMetadata.has_value())
addVectorizeMetadata(Ctx, &Args, *EnableVectorizeMetadata);

// No metadata to annotate.
if (!MData && Args.size() <= 1)
Expand Down
20 changes: 18 additions & 2 deletions polly/lib/CodeGen/LoopGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ static cl::opt<int, true>
cl::Hidden, cl::location(polly::PollyNumThreads),
cl::init(0), cl::cat(PollyCategory));

cl::opt<bool> PollyVectorizeMetadata(
"polly-annotate-metadata-vectorize",
cl::desc("Append vectorize enable/disable metadata from polly"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));

static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
"polly-scheduling",
cl::desc("Scheduling type of parallel OpenMP for loops"),
Expand Down Expand Up @@ -159,8 +164,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,

// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
if (Annotator)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);

// Don't annotate vectorize metadata when both LoopVectDisabled and
// PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false
// when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata
// to true.
if (Annotator) {
std::optional<bool> EnableVectorizeMetadata;
if (LoopVectDisabled)
EnableVectorizeMetadata = false;
else if (PollyVectorizeMetadata)
EnableVectorizeMetadata = true;
Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata);
}

IV->addIncoming(IncrementedIV, HeaderBB);
if (GuardBB)
Expand Down
61 changes: 61 additions & 0 deletions polly/test/CodeGen/Metadata/basic_vec_annotate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s

; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
; passed.

; void add(int *A, int *B, int *C,int n) {
; for(int i=0; i<n; i++)
; C[i] += A[i] + B[i];
; }

; CHECK: for.body:
; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
; CHECK: polly.stmt.for.body:
; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i32 0}
; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
entry:
br label %entry.split

entry.split: ; preds = %entry
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry.split
%wide.trip.count = zext nneg i32 %n to i64
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split
ret void

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %1, %0
%arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
%2 = load i32, ptr %arrayidx4, align 4
%add5 = add nsw i32 %add, %2
store i32 %add5, ptr %arrayidx4, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
}

attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.mustprogress"}
Loading