Skip to content

Commit 2942c74

Browse files
author
Martien de Jong
committed
[AIE] Add a loopclass to pinpoint a postpipeliner candidate
1 parent 2733f2e commit 2942c74

File tree

4 files changed

+45
-16
lines changed

4 files changed

+45
-16
lines changed

llvm/lib/Target/AIE/AIEBasePipelinerLoopInfo.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include "AIEBasePipelinerLoopInfo.h"
1616
#include "AIEBaseInstrInfo.h"
17+
#include "AIELoopClass.h"
18+
#include "AIESlotStatistics.h"
1719
#include "Utils/AIELoopUtils.h"
1820
#include "llvm/ADT/SmallVector.h"
1921
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -647,7 +649,9 @@ class ZeroOverheadLoop : public AIEBasePipelinerLoopInfo {
647649
MachineInstr *DefTripCount;
648650
MachineBasicBlock *LoopStartBlock;
649651

650-
// Decide whether the postpipeliner may do a better job
652+
// Decide whether the postpipeliner may do a better job,
653+
// A priori, or after scheduling
654+
bool preferPostPipeliner();
651655
bool preferPostPipeliner(SMSchedule &SMS);
652656

653657
public:
@@ -665,6 +669,23 @@ class ZeroOverheadLoop : public AIEBasePipelinerLoopInfo {
665669
bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override;
666670
};
667671

672+
static const std::set<int> RejectedLoopClasses{1000};
673+
674+
bool ZeroOverheadLoop::preferPostPipeliner() {
675+
AIE::SlotStatistics Stats = AIE::computeSlotStatistics(*LoopBlock, &TII);
676+
int LoopClass = classifyLoop(Stats);
677+
LLVM_DEBUG(dbgs() << "Stats="; Stats.dumpShort();
678+
dbgs() << format("\nLoopClass=%d", LoopClass));
679+
680+
if (RejectedLoopClasses.count(LoopClass)) {
681+
LLVM_DEBUG(dbgs() << format("PLI: Leaving loopclass %d for PostPipeliner\n",
682+
LoopClass));
683+
return true;
684+
}
685+
686+
return false;
687+
}
688+
668689
ZeroOverheadLoop::Assessment ZeroOverheadLoop::accept(MachineInstr *EndLoop) {
669690
if (!MinTripCount) {
670691
LLVM_DEBUG(dbgs() << "Unbounded loop detected!\n");
@@ -718,6 +739,10 @@ ZeroOverheadLoop::Assessment ZeroOverheadLoop::accept(MachineInstr *EndLoop) {
718739
setMinTripCount(InitVal);
719740
}
720741

742+
if (preferPostPipeliner()) {
743+
return Assessment::PostPipelinerCandidate;
744+
}
745+
721746
LLVM_DEBUG(dbgs() << "Loop accepted\n");
722747
return Assessment::Accept;
723748
}
@@ -853,7 +878,6 @@ createAIEBasePipelinerLoopInfo(MachineInstr *EndLoop,
853878
const AIEBaseInstrInfo &TII) {
854879
LLVM_DEBUG(dbgs() << "PLI: ----START LOOP----\n");
855880
LLVM_DEBUG(dbgs() << " Trying DownCountLoop\n");
856-
857881
DownCountLoop DCL(EndLoop, TII);
858882
auto Outcome = DCL.accept(EndLoop);
859883
if (Outcome == AIEBasePipelinerLoopInfo::Assessment::Accept) {

llvm/lib/Target/AIE/AIEBasePipelinerLoopInfo.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
7+
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88
//
99
//===----------------------------------------------------------------------===//
1010
//
@@ -74,7 +74,8 @@ class AIEBasePipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
7474
UnboundedLoop,
7575
UnsuitableInitVal,
7676
InitStepMismatch,
77-
TooLowMinTripCount
77+
TooLowMinTripCount,
78+
PostPipelinerCandidate
7879
};
7980

8081
AIEBasePipelinerLoopInfo(MachineInstr *EndLoop, const AIEBaseInstrInfo &TII);

llvm/lib/Target/AIE/AIELoopClass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ static const KernelFeatures Kernels[] = {
7878
{45, {{900, 300, 0, 2520, 0, 0, 3360}, {45, 45, 0, 45, 45}}},
7979
{46, {{0, 0, 0, 0, 2160, 0, 120, 1080}, {0, 420, 420}}},
8080
{47, {{0, 0, 0, 0, 360, 0, 240, 360}, {0, 60, 60}}},
81+
// These are pre-regalloc
82+
{1000, {{0, 0, 0, 0, 480, 0, 480, 480}, {0, 120, 120}}},
83+
{1001, {{0, 0, 0, 0, 360, 0, 480, 720}, {0, 60, 60}}},
84+
8185
};
8286

8387
std::vector<int> getLoopClassScores(const SlotStatistics &Stats) {

llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
111111
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
112112
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
113113
; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
114-
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1
115-
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1
114+
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
115+
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1
116116
; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
117-
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
118-
; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0
119-
; CHECK-NEXT: vmul.f bmh7, x6, x9, r1
120-
; CHECK-NEXT: vmul.f bmh6, x0, x7, r1
121-
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0
117+
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1
118+
; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0
119+
; CHECK-NEXT: vmul.f bmh3, x6, x9, r1
120+
; CHECK-NEXT: vmul.f bmh7, x0, x7, r1
121+
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0
122122
; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
123-
; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
124-
; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
123+
; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
124+
; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
125+
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
125126
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
126-
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
127-
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
127+
; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
128128
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
129-
; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
129+
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
130130
; CHECK-NEXT: .L_LEnd0:
131131
; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
132132
; CHECK-NEXT: // %bb.2:

0 commit comments

Comments
 (0)