Skip to content

Commit 8bce10a

Browse files
authored
[AMDGPU] Enable kernarg preload on gfx1250 (#153686)
1 parent ffe4870 commit 8bce10a

File tree

5 files changed

+531
-54
lines changed

5 files changed

+531
-54
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ AMDGPUPreloadKernArgProlog::AMDGPUPreloadKernArgProlog(MachineFunction &MF)
109109
TRI(*ST.getRegisterInfo()) {}
110110

111111
bool AMDGPUPreloadKernArgProlog::run() {
112-
if (!ST.hasKernargPreload())
112+
if (!ST.needsKernArgPreloadProlog())
113113
return false;
114114

115115
unsigned NumKernArgPreloadSGPRs = MFI.getNumKernargPreloadedSGPRs();

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,6 +1577,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15771577
// extended VA to 57 bits.
15781578
bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }
15791579

1580+
// \returns true if the target needs to create a prolog for backward
1581+
// compatibility when preloading kernel arguments.
1582+
bool needsKernArgPreloadProlog() const {
1583+
return hasKernargPreload() && !GFX1250Insts;
1584+
}
1585+
15801586
/// \returns SGPR allocation granularity supported by the subtarget.
15811587
unsigned getSGPRAllocGranule() const {
15821588
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.sat.pk.ll

Lines changed: 28 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
1616
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
1717
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
1818
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
19-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
19+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
2020
; SDAG-REAL16-NEXT: s_endpgm
2121
;
2222
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -27,7 +27,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
2727
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
2828
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
2929
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s2
30-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
30+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
3131
; SDAG-FAKE16-NEXT: s_endpgm
3232
;
3333
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
3838
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
3939
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
4040
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
41-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
41+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
4242
; GISEL-REAL16-NEXT: s_endpgm
4343
;
4444
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -49,7 +49,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
4949
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
5050
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
5151
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
52-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
52+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
5353
; GISEL-FAKE16-NEXT: s_endpgm
5454
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 %src) #0
5555
store i16 %cvt, ptr %out, align 2
@@ -58,33 +58,21 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
5858

5959
define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
6060
; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
61-
; SDAG-REAL16: ; %bb.1:
62-
; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
63-
; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
64-
; SDAG-REAL16-NEXT: s_branch .LBB1_0
65-
; SDAG-REAL16-NEXT: .p2align 8
66-
; SDAG-REAL16-NEXT: ; %bb.2:
67-
; SDAG-REAL16-NEXT: .LBB1_0:
61+
; SDAG-REAL16: ; %bb.0:
6862
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
6963
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
7064
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
7165
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
72-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
66+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
7367
; SDAG-REAL16-NEXT: s_endpgm
7468
;
7569
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
76-
; SDAG-FAKE16: ; %bb.1:
77-
; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
78-
; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
79-
; SDAG-FAKE16-NEXT: s_branch .LBB1_0
80-
; SDAG-FAKE16-NEXT: .p2align 8
81-
; SDAG-FAKE16-NEXT: ; %bb.2:
82-
; SDAG-FAKE16-NEXT: .LBB1_0:
70+
; SDAG-FAKE16: ; %bb.0:
8371
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
8472
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
8573
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s8
8674
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
87-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
75+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
8876
; SDAG-FAKE16-NEXT: s_endpgm
8977
;
9078
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -95,7 +83,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
9583
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
9684
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
9785
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
98-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
86+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
9987
; GISEL-REAL16-NEXT: s_endpgm
10088
;
10189
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -106,7 +94,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
10694
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
10795
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
10896
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
109-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
97+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
11098
; GISEL-FAKE16-NEXT: s_endpgm
11199
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 %src) #0
112100
store i16 %cvt, ptr %out, align 2
@@ -120,7 +108,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
120108
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
121109
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
122110
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
123-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
111+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
124112
; SDAG-REAL16-NEXT: s_endpgm
125113
;
126114
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -129,7 +117,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
129117
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
130118
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, 0x64
131119
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
132-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
120+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
133121
; SDAG-FAKE16-NEXT: s_endpgm
134122
;
135123
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -138,7 +126,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
138126
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
139127
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
140128
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
141-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
129+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
142130
; GISEL-REAL16-NEXT: s_endpgm
143131
;
144132
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -147,7 +135,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
147135
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, 0x64
148136
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
149137
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
150-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
138+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
151139
; GISEL-FAKE16-NEXT: s_endpgm
152140
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8(i32 100) #0
153141
store i16 %cvt, ptr %out, align 2
@@ -163,7 +151,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
163151
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
164152
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
165153
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
166-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
154+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
167155
; SDAG-REAL16-NEXT: s_endpgm
168156
;
169157
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -174,7 +162,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
174162
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
175163
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
176164
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s2
177-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
165+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
178166
; SDAG-FAKE16-NEXT: s_endpgm
179167
;
180168
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -185,7 +173,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
185173
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
186174
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
187175
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
188-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
176+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
189177
; GISEL-REAL16-NEXT: s_endpgm
190178
;
191179
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -196,7 +184,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
196184
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
197185
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
198186
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
199-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
187+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
200188
; GISEL-FAKE16-NEXT: s_endpgm
201189
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 %src) #0
202190
store i16 %cvt, ptr %out, align 2
@@ -205,33 +193,21 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
205193

206194
define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
207195
; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
208-
; SDAG-REAL16: ; %bb.1:
209-
; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
210-
; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
211-
; SDAG-REAL16-NEXT: s_branch .LBB4_0
212-
; SDAG-REAL16-NEXT: .p2align 8
213-
; SDAG-REAL16-NEXT: ; %bb.2:
214-
; SDAG-REAL16-NEXT: .LBB4_0:
196+
; SDAG-REAL16: ; %bb.0:
215197
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
216198
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
217199
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
218200
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
219-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
201+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
220202
; SDAG-REAL16-NEXT: s_endpgm
221203
;
222204
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
223-
; SDAG-FAKE16: ; %bb.1:
224-
; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
225-
; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
226-
; SDAG-FAKE16-NEXT: s_branch .LBB4_0
227-
; SDAG-FAKE16-NEXT: .p2align 8
228-
; SDAG-FAKE16-NEXT: ; %bb.2:
229-
; SDAG-FAKE16-NEXT: .LBB4_0:
205+
; SDAG-FAKE16: ; %bb.0:
230206
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
231207
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
232208
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s8
233209
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
234-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
210+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
235211
; SDAG-FAKE16-NEXT: s_endpgm
236212
;
237213
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -242,7 +218,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
242218
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
243219
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
244220
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
245-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
221+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
246222
; GISEL-REAL16-NEXT: s_endpgm
247223
;
248224
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -253,7 +229,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
253229
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
254230
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
255231
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
256-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
232+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
257233
; GISEL-FAKE16-NEXT: s_endpgm
258234
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 %src) #0
259235
store i16 %cvt, ptr %out, align 2
@@ -267,7 +243,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
267243
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
268244
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
269245
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
270-
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
246+
; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
271247
; SDAG-REAL16-NEXT: s_endpgm
272248
;
273249
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -276,7 +252,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
276252
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
277253
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, 0x64
278254
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
279-
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
255+
; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
280256
; SDAG-FAKE16-NEXT: s_endpgm
281257
;
282258
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -285,7 +261,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
285261
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
286262
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
287263
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
288-
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
264+
; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
289265
; GISEL-REAL16-NEXT: s_endpgm
290266
;
291267
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -294,7 +270,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
294270
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, 0x64
295271
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
296272
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
297-
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
273+
; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
298274
; GISEL-FAKE16-NEXT: s_endpgm
299275
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8(i32 100) #0
300276
store i16 %cvt, ptr %out, align 2

0 commit comments

Comments
 (0)