Skip to content

Commit e73d0c8

Browse files
Martien de Jongmartien-de-jong
authored andcommitted
[AIE][WAWRewriter] Auto select latency/swpaware and swpaware bias
1 parent 231f25f commit e73d0c8

File tree

2 files changed

+20
-25
lines changed

2 files changed

+20
-25
lines changed

llvm/lib/Target/AIE/AIEWawRegRewriter.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,27 +116,24 @@ RewriteMode selectMode(RewriteMode Mode, int LoopClass) {
116116
}
117117
}
118118

119-
bool runSWPAware(RewriteMode Mode, int LoopClass, int &Bias) {
120-
Bias = MinIIBias;
119+
std::optional<int> getMinIIBias(RewriteMode Mode, int LoopClass) {
121120
switch (Mode) {
122121
case RewriteMode::SWPAwareAutoBias:
123122
break;
124123
case RewriteMode::SWPAware:
125-
return true;
124+
return MinIIBias;
126125
default:
127-
return false;
126+
return {};
128127
}
129128

130129
switch (LoopClass) {
131130
case 14:
132-
Bias = -1;
133-
return true;
131+
return -1;
134132
case 18:
135133
case 29:
136-
Bias = 1;
137-
return true;
134+
return 1;
138135
default:
139-
return true;
136+
return MinIIBias;
140137
}
141138
}
142139

@@ -598,9 +595,8 @@ bool AIEWawRegRewriter::renameMBBPhysRegs(const MachineBasicBlock *MBB) {
598595
}
599596
}
600597

601-
int Bias = MinIIBias;
602-
if (runSWPAware(Mode, LoopClass, Bias)) {
603-
sortSWPAware(Candidates, NonConstMBB, Statistics, Bias);
598+
if (auto Bias = getMinIIBias(Mode, LoopClass)) {
599+
sortSWPAware(Candidates, NonConstMBB, Statistics, *Bias);
604600
}
605601

606602
// Least-Recently-Used list of physical registers for assignments to VRegs.

llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
; (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
88

99
; RUN: llc -O2 -mtriple=aie2 \
10-
; RUN: --aie-reg-rewrite-mode=latencyaware \
1110
; RUN: %s -o - | FileCheck %s
1211

1312
; The test is meant as a quick way to spot QoR regressions. In this test, the
@@ -112,22 +111,22 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
112111
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
113112
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vband x9, x10, x3; nopv
114113
; CHECK-NEXT: vldb wl7, [p0], #32; vmov wh3, wl2
115-
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
116-
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh4, bmh0, x3, x4, r1
114+
; CHECK-NEXT: nopx ; vmov wh9, wl2; vmul.f bmh4, x7, x0, r1
115+
; CHECK-NEXT: vconv.bf16.fp32 wl7, bml4; vldb wl7, [p0], #32; vmax_lt.bf16 x5, r16, x11, x8; vmac.f bmh3, bmh0, x3, x4, r1
117116
; CHECK-NEXT: vband x9, x10, x5; vmul.f bmh2, x6, x9, r1
118-
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh6, x7, x0, r1
119-
; CHECK-NEXT: vsub.f bml0, bmh5, bmh1, r0
120-
; CHECK-NEXT: vmul.f bmh3, x6, x9, r1
121-
; CHECK-NEXT: vmul.f bmh7, x0, x7, r1
122-
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml1, bmh6, bmh1, r0
117+
; CHECK-NEXT: vmov wh9, wl2; vmul.f bmh5, x7, x0, r1
118+
; CHECK-NEXT: vsub.f bml1, bmh4, bmh1, r0
119+
; CHECK-NEXT: vmul.f bmh7, x6, x9, r1
120+
; CHECK-NEXT: vmul.f bmh6, x0, x7, r1
121+
; CHECK-NEXT: vmov wh5, wl2; vsub.f bml2, bmh5, bmh1, r0
123122
; CHECK-NEXT: vconv.bf16.fp32 wl7, bmh2; vmul.f bmh8, x0, x7, r1
124-
; CHECK-NEXT: vmac.f bml2, bmh0, x5, x4, r1
125-
; CHECK-NEXT: vmsc.f bml3, bmh4, x7, x3, r1
126-
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh3
123+
; CHECK-NEXT: vmac.f bml0, bmh0, x5, x4, r1
124+
; CHECK-NEXT: vmsc.f bml3, bmh3, x7, x3, r1
127125
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh7
128-
; CHECK-NEXT: vst.conv.bf16.fp32 bml0, [p1], #32; vmsc.f bml4, bml2, x3, x5, r1
126+
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh6
127+
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmsc.f bml4, bml0, x3, x5, r1
129128
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh8; vmin_ge.bf16 x9, r16, x3, x1
130-
; CHECK-NEXT: vst.conv.bf16.fp32 bml1, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
129+
; CHECK-NEXT: vst.conv.bf16.fp32 bml2, [p1], #32; vmax_lt.bf16 x3, r16, x9, x8
131130
; CHECK-NEXT: .L_LEnd0:
132131
; CHECK-NEXT: nopb ; nopa ; vconv.bf16.fp32 wl7, bml3; nopx ; vmin_ge.bf16 x11, r16, x5, x1; nopv
133132
; CHECK-NEXT: // %bb.2:

0 commit comments

Comments
 (0)