Skip to content

Commit 29976f2

Browse files
authored
[AMDGPU] Handle S_GETREG_B32 hazard on gfx1250 (#153848)
GFX1250 SPG says: S_GETREG_B32 does not wait for idle before executing. The user must S_WAIT_ALU 0 before S_GETREG_B32 on: STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1 parent 3a4a60d commit 29976f2

File tree

4 files changed

+119
-0
lines changed

4 files changed

+119
-0
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12001200
fixShift64HighRegBug(MI);
12011201
fixVALUMaskWriteHazard(MI);
12021202
fixRequiredExportPriority(MI);
1203+
if (ST.requiresWaitIdleBeforeGetReg())
1204+
fixGetRegWaitIdle(MI);
12031205
}
12041206

12051207
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3428,3 +3430,24 @@ bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
34283430

34293431
return true;
34303432
}
3433+
3434+
bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) {
3435+
if (!isSGetReg(MI->getOpcode()))
3436+
return false;
3437+
3438+
const SIInstrInfo *TII = ST.getInstrInfo();
3439+
switch (getHWReg(TII, *MI)) {
3440+
default:
3441+
return false;
3442+
case AMDGPU::Hwreg::ID_STATUS:
3443+
case AMDGPU::Hwreg::ID_STATE_PRIV:
3444+
case AMDGPU::Hwreg::ID_EXCP_FLAG_PRIV:
3445+
case AMDGPU::Hwreg::ID_EXCP_FLAG_USER:
3446+
break;
3447+
}
3448+
3449+
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
3450+
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
3451+
.addImm(0);
3452+
return true;
3453+
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
110110
bool fixShift64HighRegBug(MachineInstr *MI);
111111
bool fixVALUMaskWriteHazard(MachineInstr *MI);
112112
bool fixRequiredExportPriority(MachineInstr *MI);
113+
bool fixGetRegWaitIdle(MachineInstr *MI);
113114

114115
int checkMAIHazards(MachineInstr *MI);
115116
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
18011801
// instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
18021802
bool requiresNopBeforeDeallocVGPRs() const { return !GFX1250Insts; }
18031803

1804+
// \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
1805+
// STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
1806+
bool requiresWaitIdleBeforeGetReg() const { return GFX1250Insts; }
1807+
18041808
bool isDynamicVGPREnabled() const { return DynamicVGPR; }
18051809
unsigned getDynamicVGPRBlockSize() const {
18061810
return DynamicVGPRBlockSize32 ? 32 : 16;
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec -o - %s | FileCheck --check-prefix=GCN %s
3+
4+
---
5+
name: s_getreg_mode
6+
body: |
7+
bb.0:
8+
; GCN-LABEL: name: s_getreg_mode
9+
; GCN: $sgpr0 = S_GETREG_B32 1, implicit $mode
10+
$sgpr0 = S_GETREG_B32 1, implicit $mode
11+
...
12+
13+
---
14+
name: s_getreg_status
15+
body: |
16+
bb.0:
17+
; GCN-LABEL: name: s_getreg_status
18+
; GCN: S_WAITCNT_DEPCTR 0
19+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
20+
$sgpr0 = S_GETREG_B32 2, implicit $mode
21+
...
22+
23+
---
24+
name: s_getreg_status_masked
25+
body: |
26+
bb.0:
27+
; GCN-LABEL: name: s_getreg_status_masked
28+
; GCN: S_WAITCNT_DEPCTR 0
29+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 66, implicit $mode
30+
$sgpr0 = S_GETREG_B32 66, implicit $mode
31+
...
32+
33+
---
34+
name: s_getreg_state_priv
35+
body: |
36+
bb.0:
37+
; GCN-LABEL: name: s_getreg_state_priv
38+
; GCN: S_WAITCNT_DEPCTR 0
39+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 4, implicit $mode
40+
$sgpr0 = S_GETREG_B32 4, implicit $mode
41+
...
42+
43+
---
44+
name: s_getreg_excp_flag_priv
45+
body: |
46+
bb.0:
47+
; GCN-LABEL: name: s_getreg_excp_flag_priv
48+
; GCN: S_WAITCNT_DEPCTR 0
49+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 17, implicit $mode
50+
$sgpr0 = S_GETREG_B32 17, implicit $mode
51+
...
52+
53+
---
54+
name: s_getreg_excp_flag_user
55+
body: |
56+
bb.0:
57+
; GCN-LABEL: name: s_getreg_excp_flag_user
58+
; GCN: S_WAITCNT_DEPCTR 0
59+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 18, implicit $mode
60+
$sgpr0 = S_GETREG_B32 18, implicit $mode
61+
...
62+
63+
---
64+
name: s_getreg_status_in_bundle
65+
body: |
66+
bb.0:
67+
; GCN-LABEL: name: s_getreg_status_in_bundle
68+
; GCN: BUNDLE {
69+
; GCN-NEXT: S_NOP 0
70+
; GCN-NEXT: S_WAITCNT_DEPCTR 0
71+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
72+
; GCN-NEXT: }
73+
BUNDLE {
74+
S_NOP 0
75+
$sgpr0 = S_GETREG_B32 2, implicit $mode
76+
}
77+
...
78+
79+
---
80+
name: s_getreg_status_top_of_bundle
81+
body: |
82+
bb.0:
83+
; GCN-LABEL: name: s_getreg_status_top_of_bundle
84+
; GCN: BUNDLE {
85+
; GCN-NEXT: S_WAITCNT_DEPCTR 0
86+
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
87+
; GCN-NEXT: }
88+
BUNDLE {
89+
$sgpr0 = S_GETREG_B32 2, implicit $mode
90+
}
91+
...

0 commit comments

Comments
 (0)