Skip to content

Commit 64c887f

Browse files
committed
[AMDGPU] Support bottom-up postRA scheduling.
1 parent de528d6 commit 64c887f

File tree

3 files changed

+95
-2
lines changed

3 files changed

+95
-2
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,25 @@ void GCNHazardRecognizer::processBundle() {
284284
CurrCycleInstr = nullptr;
285285
}
286286

287+
void GCNHazardRecognizer::reverseProcessBundle() {
288+
MachineBasicBlock::instr_iterator MI =
289+
std::next(CurrCycleInstr->getIterator());
290+
MachineBasicBlock::instr_iterator E =
291+
CurrCycleInstr->getParent()->instr_end();
292+
293+
for (; MI != E && MI->isInsideBundle(); ++MI) {
294+
CurrCycleInstr = &*MI;
295+
for (unsigned I = 0, E = MaxLookAhead - 1; I < E; ++I) {
296+
if (!EmittedInstrs.empty())
297+
EmittedInstrs.pop_back();
298+
}
299+
300+
EmittedInstrs.push_back(CurrCycleInstr);
301+
EmittedInstrs.resize(MaxLookAhead);
302+
}
303+
CurrCycleInstr = nullptr;
304+
}
305+
287306
void GCNHazardRecognizer::runOnInstruction(MachineInstr *MI) {
288307
assert(IsHazardRecognizerMode);
289308

@@ -423,7 +442,32 @@ void GCNHazardRecognizer::AdvanceCycle() {
423442
}
424443

425444
void GCNHazardRecognizer::RecedeCycle() {
426-
llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
445+
if (!CurrCycleInstr) {
446+
if (!EmittedInstrs.empty())
447+
EmittedInstrs.pop_back();
448+
return;
449+
}
450+
451+
if (CurrCycleInstr->isBundle()) {
452+
reverseProcessBundle();
453+
return;
454+
}
455+
456+
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
457+
if (!NumWaitStates) {
458+
CurrCycleInstr = nullptr;
459+
return;
460+
}
461+
462+
EmittedInstrs.push_back(CurrCycleInstr);
463+
for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); i < e;
464+
++i) {
465+
if (!EmittedInstrs.empty())
466+
EmittedInstrs.pop_back();
467+
}
468+
469+
EmittedInstrs.resize(getMaxLookAhead());
470+
CurrCycleInstr = nullptr;
427471
}
428472

429473
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
6969
// Advance over a MachineInstr bundle. Look for hazards in the bundled
7070
// instructions.
7171
void processBundle();
72+
void reverseProcessBundle();
7273

7374
// Run on an individual instruction in hazard recognizer mode. This can be
7475
// used on a newly inserted instruction before returning from PreEmitNoops.

llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -verify-misched -o - %s | FileCheck -check-prefix=CHECK %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -misched-cluster=false -run-pass=postmisched -misched-postra-direction=bottomup -verify-misched -o - %s | FileCheck -check-prefix=CHECK-BOTTOMUP %s
34

45
--- |
56
define amdgpu_kernel void @no_sched_barrier(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { ret void }
@@ -29,6 +30,21 @@ body: |
2930
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
3031
; CHECK-NEXT: }
3132
; CHECK-NEXT: S_ENDPGM 0
33+
;
34+
; CHECK-BOTTOMUP-LABEL: name: no_sched_barrier
35+
; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
36+
; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
37+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
38+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
39+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
40+
; CHECK-BOTTOMUP-NEXT: }
41+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
42+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
43+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
44+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
45+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
46+
; CHECK-BOTTOMUP-NEXT: }
47+
; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
3248
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
3349
renamable $vgpr0 = IMPLICIT_DEF
3450
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -66,6 +82,22 @@ body: |
6682
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
6783
; CHECK-NEXT: }
6884
; CHECK-NEXT: S_ENDPGM 0
85+
;
86+
; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_0
87+
; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
88+
; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
89+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
90+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
91+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
92+
; CHECK-BOTTOMUP-NEXT: }
93+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
94+
; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 0
95+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
96+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
97+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
98+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
99+
; CHECK-BOTTOMUP-NEXT: }
100+
; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
69101
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
70102
renamable $vgpr0 = IMPLICIT_DEF
71103
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
@@ -105,6 +137,22 @@ body: |
105137
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
106138
; CHECK-NEXT: }
107139
; CHECK-NEXT: S_ENDPGM 0
140+
;
141+
; CHECK-BOTTOMUP-LABEL: name: sched_barrier_mask_1
142+
; CHECK-BOTTOMUP: renamable $vgpr0 = IMPLICIT_DEF
143+
; CHECK-BOTTOMUP-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
144+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {
145+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
146+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, renamable $vgpr0, 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
147+
; CHECK-BOTTOMUP-NEXT: }
148+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr1 = nsw V_MUL_LO_U32_e64 killed $vgpr1, $vgpr1, implicit $exec
149+
; CHECK-BOTTOMUP-NEXT: renamable $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr2, $vgpr2, implicit $exec
150+
; CHECK-BOTTOMUP-NEXT: SCHED_BARRIER 1
151+
; CHECK-BOTTOMUP-NEXT: BUNDLE implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $sgpr0_sgpr1, implicit $exec, implicit killed $vgpr2 {
152+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr0, killed renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
153+
; CHECK-BOTTOMUP-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1, 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
154+
; CHECK-BOTTOMUP-NEXT: }
155+
; CHECK-BOTTOMUP-NEXT: S_ENDPGM 0
108156
renamable $sgpr0_sgpr1 = IMPLICIT_DEF
109157
renamable $vgpr0 = IMPLICIT_DEF
110158
BUNDLE implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $sgpr0_sgpr1, implicit $vgpr0, implicit $exec {

0 commit comments

Comments
 (0)