Skip to content
13 changes: 10 additions & 3 deletions polly/include/polly/CodeGen/IRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,16 @@ class ScopAnnotator {
/// Annotate the new instruction @p I for all parallel loops.
void annotate(llvm::Instruction *I);

/// Annotate the loop latch @p B wrt. @p L.
void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const;
/// Add vectorize metadata to the list @p Args after setting it to
/// @p EnableLoopVectorizer
void addVectorizeMetadata(llvm::LLVMContext &Ctx,
llvm::SmallVector<llvm::Metadata *, 3> *Args,
bool EnableLoopVectorizer) const;

/// Annotate the loop latch @p B.
void annotateLoopLatch(
llvm::BranchInst *B, bool IsParallel,
std::optional<bool> EnableVectorizeMetadata = std::nullopt) const;

/// Add alternative alias based pointers
///
Expand Down
29 changes: 29 additions & 0 deletions polly/lib/CodeGen/CodeGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::cat(PollyCategory));

cl::opt<bool> PollyVectorizeMetadata(
"polly-annotate-metadata-vectorize",
cl::desc("Append vectorize enable/disable metadata from polly"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));

bool polly::PerfMonitoring;

static cl::opt<bool, true>
Expand Down Expand Up @@ -233,6 +238,30 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);

// The code below annotates the "llvm.loop.vectorize.enable" to false
// for the code flow taken when RTCs fail. Because we don't want the
// Loop Vectorizer to come in later and vectorize the original fall back
// loop when 'polly-annotate-metadata-vectorize' is passed.
if (PollyVectorizeMetadata) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (PollyVectorizeMetadata) {

I'd be fine if the loop vectorizer is always disabled for fallback code. Would it means too many test updates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am seeing around 19 failures.
Should we have it as separate patch.
Or can it be part of this patch itself ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Meinersbur Please provide inputs on if we have to add the test case changes for 19 failures as separate patch.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can add the test changes into PR. If that's too much work I'd accept the -polly-annotate-metadata-vectorize opt-in as well.

LLVMContext &Ctx = S.getFunction().getContext();
for (Loop *L : LI.getLoopsInPreorder()) {
if (!L || !S.contains(L))
continue;
MDNode *LoopID = L->getLoopID();
SmallVector<Metadata *, 3> Args;
if (LoopID)
for (unsigned i = 0, e = LoopID->getNumOperands(); i != e; ++i)
Args.push_back(LoopID->getOperand(i));
else
Args.push_back(nullptr);

Annotator.addVectorizeMetadata(Ctx, &Args, false);
MDNode *NewLoopID = MDNode::get(Ctx, Args);
NewLoopID->replaceOperandWith(0, NewLoopID);
L->setLoopID(NewLoopID);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LLVMContext &Ctx = S.getFunction().getContext();
for (Loop *L : LI.getLoopsInPreorder()) {
if (!L || !S.contains(L))
continue;
MDNode *LoopID = L->getLoopID();
SmallVector<Metadata *, 3> Args;
if (LoopID)
for (unsigned i = 0, e = LoopID->getNumOperands(); i != e; ++i)
Args.push_back(LoopID->getOperand(i));
else
Args.push_back(nullptr);
Annotator.addVectorizeMetadata(Ctx, &Args, false);
MDNode *NewLoopID = MDNode::get(Ctx, Args);
NewLoopID->replaceOperandWith(0, NewLoopID);
L->setLoopID(NewLoopID);
}
#include "llvm/Transforms/Utils/LoopUtils.h"
LLVMContext &Ctx = S.getFunction().getContext();
for (Loop *L : LI.getLoopsInPreorder()) {
if (!S.contains(L))
continue;
addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A small query here ..
Using addStringMetadataToLoop API makes the value get set as i32.
Something like
!2 = !{!"llvm.loop.vectorize.enable", i32 0}
Though the behavior is same, Is it okay, as the value should be i1 according to LangRef.rst?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not make a difference:

}

if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
Expand Down
41 changes: 26 additions & 15 deletions polly/lib/CodeGen/IRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,28 @@ void ScopAnnotator::popLoop(bool IsParallel) {
LoopAttrEnv.pop_back();
}

void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
bool IsLoopVectorizerDisabled) const {
void ScopAnnotator::addVectorizeMetadata(LLVMContext &Ctx,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void ScopAnnotator::addVectorizeMetadata(LLVMContext &Ctx,
static void addVectorizeMetadata(LLVMContext &Ctx,

AFICS this does not use any ScopAnnotator members

SmallVector<Metadata *, 3> *Args,
bool EnableLoopVectorizer) const {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *Value =
ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
static void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,

llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}

// Last argument is optional, if no value is passed, we don't annotate
// any vectorize metadata.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interface descriptions usually go to the declaration in the header files as Doxygen comment (/// @param EnableVectorizeMetadata If no value is passed, we don't annotate any vectorize metadata.)

void ScopAnnotator::annotateLoopLatch(
BranchInst *B, bool IsParallel,
std::optional<bool> EnableVectorizeMetadata) const {
LLVMContext &Ctx = SE->getContext();
SmallVector<Metadata *, 3> Args;

Expand All @@ -145,19 +165,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
if (MData)
llvm::append_range(Args, drop_begin(MData->operands(), 1));
}

if (IsLoopVectorizerDisabled) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
}

if (IsParallel) {
MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
MDNode *AccGroup = ParallelLoops.back();
Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
}
if (IsParallel)
addParallelMetadata(Ctx, &Args, ParallelLoops);
if (EnableVectorizeMetadata.has_value())
this->addVectorizeMetadata(Ctx, &Args, *EnableVectorizeMetadata);

// No metadata to annotate.
if (!MData && Args.size() <= 1)
Expand Down
19 changes: 17 additions & 2 deletions polly/lib/CodeGen/LoopGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
Expand All @@ -35,6 +36,8 @@ static cl::opt<int, true>
cl::Hidden, cl::location(polly::PollyNumThreads),
cl::init(0), cl::cat(PollyCategory));

extern cl::opt<bool> PollyVectorizeMetadata;

static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
"polly-scheduling",
cl::desc("Scheduling type of parallel OpenMP for loops"),
Expand Down Expand Up @@ -159,8 +162,20 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,

// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
if (Annotator)
Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);

// Don't annotate vectorize metadata when both LoopVectDisabled and
// PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false
// when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata
// to true.
if (Annotator) {
if (!LoopVectDisabled && !PollyVectorizeMetadata)
Annotator->annotateLoopLatch(B, Parallel);
else
Annotator->annotateLoopLatch(
B, Parallel,
/*EnableVectorizeMetadata*/ !LoopVectDisabled &&
PollyVectorizeMetadata);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!LoopVectDisabled && !PollyVectorizeMetadata)
Annotator->annotateLoopLatch(B, Parallel);
else
Annotator->annotateLoopLatch(
B, Parallel,
/*EnableVectorizeMetadata*/ !LoopVectDisabled &&
PollyVectorizeMetadata);
std::optional<bool> EnableVectorizeMetadata;
if (LoopVectDisabled)
EnableVectorizeMetadata = false;
else if (PollyVectorizeMetadata)
EnableVectorizeMetadata = true;
Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata);

If I got the boolean logic correct

}

IV->addIncoming(IncrementedIV, HeaderBB);
if (GuardBB)
Expand Down
61 changes: 61 additions & 0 deletions polly/test/CodeGen/Metadata/basic_vec_annotate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s

; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
; passed.

; void add(int *A, int *B, int *C,int n) {
; for(int i=0; i<n; i++)
; C[i] += A[i] + B[i];
; }

; CHECK: for.body:
; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
; CHECK: polly.stmt.for.body:
; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 false}
; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64-unknown-linux-gnu"

; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
entry:
br label %entry.split

entry.split: ; preds = %entry
%cmp10 = icmp sgt i32 %n, 0
br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry.split
%wide.trip.count = zext nneg i32 %n to i64
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split
ret void

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
%1 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %1, %0
%arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
%2 = load i32, ptr %arrayidx4, align 4
%add5 = add nsw i32 %add, %2
store i32 %add5, ptr %arrayidx4, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
}

attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }

!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.mustprogress"}
Loading