Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions llvm/lib/Target/AIE/AIEBasePipelinerLoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "AIEBasePipelinerLoopInfo.h"
#include "AIEBaseInstrInfo.h"
#include "AIELoopClass.h"
#include "AIESlotStatistics.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand Down Expand Up @@ -647,7 +649,9 @@ class ZeroOverheadLoop : public AIEBasePipelinerLoopInfo {
MachineInstr *DefTripCount;
MachineBasicBlock *LoopStartBlock;

// Decide whether the postpipeliner may do a better job
// Decide whether the postpipeliner may do a better job,
// A priori, or after scheduling
bool preferPostPipeliner();
bool preferPostPipeliner(SMSchedule &SMS);

public:
Expand All @@ -665,6 +669,23 @@ class ZeroOverheadLoop : public AIEBasePipelinerLoopInfo {
bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override;
};

static const std::set<int> RejectedLoopClasses{1000};

bool ZeroOverheadLoop::preferPostPipeliner() {
AIE::SlotStatistics Stats = AIE::computeSlotStatistics(*LoopBlock, &TII);
int LoopClass = classifyLoop(Stats);
LLVM_DEBUG(dbgs() << "Stats="; Stats.dumpShort();
dbgs() << format("\nLoopClass=%d", LoopClass));

if (RejectedLoopClasses.count(LoopClass)) {
LLVM_DEBUG(dbgs() << format("PLI: Leaving loopclass %d for PostPipeliner\n",
LoopClass));
return true;
}

return false;
}

ZeroOverheadLoop::Assessment ZeroOverheadLoop::accept(MachineInstr *EndLoop) {
if (!MinTripCount) {
LLVM_DEBUG(dbgs() << "Unbounded loop detected!\n");
Expand Down Expand Up @@ -718,6 +739,10 @@ ZeroOverheadLoop::Assessment ZeroOverheadLoop::accept(MachineInstr *EndLoop) {
setMinTripCount(InitVal);
}

if (preferPostPipeliner()) {
return Assessment::PostPipelinerCandidate;
}

LLVM_DEBUG(dbgs() << "Loop accepted\n");
return Assessment::Accept;
}
Expand Down Expand Up @@ -853,7 +878,6 @@ createAIEBasePipelinerLoopInfo(MachineInstr *EndLoop,
const AIEBaseInstrInfo &TII) {
LLVM_DEBUG(dbgs() << "PLI: ----START LOOP----\n");
LLVM_DEBUG(dbgs() << " Trying DownCountLoop\n");

DownCountLoop DCL(EndLoop, TII);
auto Outcome = DCL.accept(EndLoop);
if (Outcome == AIEBasePipelinerLoopInfo::Assessment::Accept) {
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIEBasePipelinerLoopInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -74,7 +74,8 @@ class AIEBasePipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
UnboundedLoop,
UnsuitableInitVal,
InitStepMismatch,
TooLowMinTripCount
TooLowMinTripCount,
PostPipelinerCandidate
};

AIEBasePipelinerLoopInfo(MachineInstr *EndLoop, const AIEBaseInstrInfo &TII);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIELoopClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ static const KernelFeatures Kernels[] = {
{45, {{900, 300, 0, 2520, 0, 0, 3360}, {45, 45, 0, 45, 45}}},
{46, {{0, 0, 0, 0, 2160, 0, 120, 1080}, {0, 420, 420}}},
{47, {{0, 0, 0, 0, 360, 0, 240, 360}, {0, 60, 60}}},
// These are pre-regalloc
{1000, {{0, 0, 0, 0, 480, 0, 480, 480}, {0, 120, 120}}},
{1001, {{0, 0, 0, 0, 360, 0, 480, 720}, {0, 60, 60}}},
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed to add this, since it caused a false positive match due to small distance


};

std::vector<int> getLoopClassScores(const SlotStatistics &Stats) {
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1
; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0
; CHECK-NEXT: vmul.f bmh7, x6, x9, r1
; CHECK-NEXT: vmul.f bmh6, x0, x7, r1
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1
; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0
; CHECK-NEXT: vmul.f bmh3, x6, x9, r1
; CHECK-NEXT: vmul.f bmh7, x0, x7, r1
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0
; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
; CHECK-NEXT: // %bb.2:
Expand Down