Skip to content

Commit 4d2fa8a

Browse files
author
Martien de Jong
committed
[AIE] Add a loopclass to pinpoint a postpipeliner candidate
1 parent 8b335be commit 4d2fa8a

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

llvm/lib/Target/AIE/AIEBasePipelinerLoopInfo.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include "AIEBasePipelinerLoopInfo.h"
1616
#include "AIEBaseInstrInfo.h"
17+
#include "AIELoopClass.h"
18+
#include "AIESlotStatistics.h"
1719
#include "Utils/AIELoopUtils.h"
1820
#include "llvm/ADT/SmallVector.h"
1921
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -751,6 +753,19 @@ bool ZeroOverheadLoop::preferPostPipeliner(SMSchedule &SMS) {
751753
return false;
752754
}
753755

756+
AIE::SlotStatistics Stats = AIE::computeSlotStatistics(*LoopBlock, &TII);
757+
int LoopClass = classifyLoop(Stats);
758+
LLVM_DEBUG(dbgs() << "Stats="; Stats.dumpShort();
759+
dbgs() << format("\nLoopClass=%d", LoopClass));
760+
switch (LoopClass) {
761+
case 1000:
762+
LLVM_DEBUG(dbgs() << format("PLI: Leaving loopclass %d for PostPipeliner\n",
763+
LoopClass));
764+
return true;
765+
default:
766+
break;
767+
}
768+
754769
unsigned NS = SMS.getMaxStageCount() + 1;
755770
int II = SMS.getInitiationInterval();
756771
if (NS > LoopMaxStageCount && II < PostPipelinerCutoff) {
@@ -853,7 +868,6 @@ createAIEBasePipelinerLoopInfo(MachineInstr *EndLoop,
853868
const AIEBaseInstrInfo &TII) {
854869
LLVM_DEBUG(dbgs() << "PLI: ----START LOOP----\n");
855870
LLVM_DEBUG(dbgs() << " Trying DownCountLoop\n");
856-
857871
DownCountLoop DCL(EndLoop, TII);
858872
auto Outcome = DCL.accept(EndLoop);
859873
if (Outcome == AIEBasePipelinerLoopInfo::Assessment::Accept) {

llvm/lib/Target/AIE/AIELoopClass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ static const KernelFeatures Kernels[] = {
7878
{45, {{900, 300, 0, 2520, 0, 0, 3360}, {45, 45, 0, 45, 45}}},
7979
{46, {{0, 0, 0, 0, 2160, 0, 120, 1080}, {0, 420, 420}}},
8080
{47, {{0, 0, 0, 0, 360, 0, 240, 360}, {0, 60, 60}}},
81+
// These are pre-regalloc
82+
{1000, {{0, 0, 0, 0, 480, 0, 480, 480}, {0, 120, 120}}},
83+
{1001, {{0, 0, 0, 0, 360, 0, 480, 720}, {0, 60, 60}}},
84+
8185
};
8286

8387
std::vector<int> getLoopClassScores(const SlotStatistics &Stats) {

llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
111111
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
112112
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
113113
; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
114-
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1
115-
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1
114+
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
115+
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1
116116
; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
117-
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
118-
; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0
119-
; CHECK-NEXT: vmul.f bmh7, x6, x9, r1
120-
; CHECK-NEXT: vmul.f bmh6, x0, x7, r1
121-
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0
117+
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1
118+
; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0
119+
; CHECK-NEXT: vmul.f bmh3, x6, x9, r1
120+
; CHECK-NEXT: vmul.f bmh7, x0, x7, r1
121+
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0
122122
; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
123-
; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
124-
; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
123+
; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
124+
; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
125+
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
125126
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
126-
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
127-
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
127+
; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
128128
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
129-
; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
129+
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
130130
; CHECK-NEXT: .L_LEnd0:
131131
; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
132132
; CHECK-NEXT: // %bb.2:

0 commit comments

Comments
 (0)