Skip to content

Commit 0956b58

Browse files
rampiteckcloudy0717
authored andcommitted
[AMDGPU] Prevent folding of flat_scr_base_hi into a 64-bit SALU (llvm#170373)
Fixes: SWDEV-563886
1 parent 2e11044 commit 0956b58

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,6 +1868,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
18681868
return GFX1250Insts && getGeneration() == GFX12;
18691869
}
18701870

1871+
// src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit
1872+
// result.
1873+
bool hasFlatScratchHiInB64InstHazard() const {
1874+
return GFX1250Insts && getGeneration() == GFX12;
1875+
}
1876+
18711877
/// \returns true if the subtarget supports clusters of workgroups.
18721878
bool hasClusters() const { return HasClusters; }
18731879

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6256,6 +6256,17 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
62566256
(int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
62576257
RI.isSGPRReg(MRI, MO.getReg()))
62586258
return false;
6259+
6260+
if (ST.hasFlatScratchHiInB64InstHazard() &&
6261+
MO.getReg() == AMDGPU::SRC_FLAT_SCRATCH_BASE_HI && isSALU(MI)) {
6262+
if (const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::sdst)) {
6263+
if (AMDGPU::getRegBitWidth(*MRI.getRegClass(Dst->getReg())) == 64)
6264+
return false;
6265+
}
6266+
if (Opc == AMDGPU::S_BITCMP0_B64 || Opc == AMDGPU::S_BITCMP1_B64)
6267+
return false;
6268+
}
6269+
62596270
return true;
62606271
}
62616272

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: s_ashr_i64
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
10+
; GCN-LABEL: name: s_ashr_i64
11+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
12+
; GCN-NEXT: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 undef %2:sreg_64, [[COPY]], implicit-def $scc
13+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
14+
%2:sreg_64 = S_ASHR_I64 undef %1:sreg_64, %0, implicit-def $scc
15+
...
16+
17+
---
18+
name: s_lshl_b64
19+
tracksRegLiveness: true
20+
body: |
21+
bb.0:
22+
23+
; GCN-LABEL: name: s_lshl_b64
24+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
25+
; GCN-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 undef %2:sreg_64, [[COPY]], implicit-def $scc
26+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
27+
%2:sreg_64 = S_LSHL_B64 undef %1:sreg_64, %0, implicit-def $scc
28+
...
29+
30+
---
31+
name: s_lshr_b64
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
36+
; GCN-LABEL: name: s_lshr_b64
37+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
38+
; GCN-NEXT: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 undef %2:sreg_64, [[COPY]], implicit-def $scc
39+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
40+
%2:sreg_64 = S_LSHR_B64 undef %1:sreg_64, %0, implicit-def $scc
41+
...
42+
43+
---
44+
name: s_bfe_i64
45+
tracksRegLiveness: true
46+
body: |
47+
bb.0:
48+
49+
; GCN-LABEL: name: s_bfe_i64
50+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
51+
; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 undef %2:sreg_64, [[COPY]], implicit-def $scc
52+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
53+
%2:sreg_64 = S_BFE_I64 undef %1:sreg_64, %0, implicit-def $scc
54+
...
55+
56+
---
57+
name: s_bfe_u64
58+
tracksRegLiveness: true
59+
body: |
60+
bb.0:
61+
62+
; GCN-LABEL: name: s_bfe_u64
63+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
64+
; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 undef %2:sreg_64, [[COPY]], implicit-def $scc
65+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
66+
%2:sreg_64 = S_BFE_U64 undef %1:sreg_64, %0, implicit-def $scc
67+
...
68+
69+
---
70+
name: s_bfm_b64
71+
tracksRegLiveness: true
72+
body: |
73+
bb.0:
74+
75+
; GCN-LABEL: name: s_bfm_b64
76+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
77+
; GCN-NEXT: [[S_BFM_B64_:%[0-9]+]]:sreg_64 = S_BFM_B64 [[COPY]], 1, implicit-def $scc
78+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
79+
%1:sreg_64 = S_BFM_B64 %0, 1, implicit-def $scc
80+
...
81+
82+
---
83+
name: s_bitcmp0_b64
84+
tracksRegLiveness: true
85+
body: |
86+
bb.0:
87+
88+
; GCN-LABEL: name: s_bitcmp0_b64
89+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
90+
; GCN-NEXT: S_BITCMP0_B64 undef %1:sreg_64, [[COPY]], implicit undef $scc, implicit-def $scc
91+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
92+
S_BITCMP0_B64 undef %1:sreg_64, %0, implicit undef $scc, implicit-def $scc
93+
...
94+
95+
---
96+
name: s_bitcmp1_b64
97+
tracksRegLiveness: true
98+
body: |
99+
bb.0:
100+
101+
; GCN-LABEL: name: s_bitcmp1_b64
102+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
103+
; GCN-NEXT: S_BITCMP1_B64 undef %1:sreg_64, [[COPY]], implicit undef $scc, implicit-def $scc
104+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
105+
S_BITCMP1_B64 undef %1:sreg_64, %0, implicit undef $scc, implicit-def $scc
106+
...
107+
108+
---
109+
name: s_bitreplicate_b64_b32
110+
tracksRegLiveness: true
111+
body: |
112+
bb.0:
113+
114+
; GCN-LABEL: name: s_bitreplicate_b64_b32
115+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
116+
; GCN-NEXT: [[S_BITREPLICATE_B64_B32_:%[0-9]+]]:sreg_64 = S_BITREPLICATE_B64_B32 [[COPY]], implicit-def $scc
117+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
118+
%2:sreg_64 = S_BITREPLICATE_B64_B32 %0, implicit-def $scc
119+
...
120+
121+
---
122+
name: s_bitset0_b64
123+
tracksRegLiveness: true
124+
body: |
125+
bb.0:
126+
127+
; GCN-LABEL: name: s_bitset0_b64
128+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
129+
; GCN-NEXT: [[S_BITSET0_B64_:%[0-9]+]]:sreg_64 = S_BITSET0_B64 [[COPY]], undef [[S_BITSET0_B64_]], implicit-def $scc
130+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
131+
%1:sreg_64 = S_BITSET0_B64 %0, undef %1:sreg_64, implicit-def $scc
132+
...
133+
134+
---
135+
name: s_bitset1_b64
136+
tracksRegLiveness: true
137+
body: |
138+
bb.0:
139+
140+
; GCN-LABEL: name: s_bitset1_b64
141+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $src_flat_scratch_base_hi
142+
; GCN-NEXT: [[S_BITSET1_B64_:%[0-9]+]]:sreg_64 = S_BITSET1_B64 [[COPY]], undef [[S_BITSET1_B64_]], implicit-def $scc
143+
%0:sreg_32 = COPY $src_flat_scratch_base_hi
144+
%1:sreg_64 = S_BITSET1_B64 %0, undef %1:sreg_64, implicit-def $scc
145+
...

0 commit comments

Comments
 (0)