@@ -16,7 +16,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
16
16
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
17
17
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
18
18
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
19
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
19
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
20
20
; SDAG-REAL16-NEXT: s_endpgm
21
21
;
22
22
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -27,7 +27,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
27
27
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
28
28
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
29
29
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s2
30
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
30
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
31
31
; SDAG-FAKE16-NEXT: s_endpgm
32
32
;
33
33
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
38
38
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
39
39
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
40
40
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
41
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
41
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
42
42
; GISEL-REAL16-NEXT: s_endpgm
43
43
;
44
44
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -49,7 +49,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
49
49
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
50
50
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
51
51
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
52
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
52
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
53
53
; GISEL-FAKE16-NEXT: s_endpgm
54
54
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 %src ) #0
55
55
store i16 %cvt , ptr %out , align 2
@@ -58,33 +58,21 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
58
58
59
59
define amdgpu_kernel void @sat_pk4_i4_i8_f32_s (i32 inreg %src , ptr %out ) #1 {
60
60
; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
61
- ; SDAG-REAL16: ; %bb.1:
62
- ; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
63
- ; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
64
- ; SDAG-REAL16-NEXT: s_branch .LBB1_0
65
- ; SDAG-REAL16-NEXT: .p2align 8
66
- ; SDAG-REAL16-NEXT: ; %bb.2:
67
- ; SDAG-REAL16-NEXT: .LBB1_0:
61
+ ; SDAG-REAL16: ; %bb.0:
68
62
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
69
63
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
70
64
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
71
65
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
72
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
66
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
73
67
; SDAG-REAL16-NEXT: s_endpgm
74
68
;
75
69
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
76
- ; SDAG-FAKE16: ; %bb.1:
77
- ; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
78
- ; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
79
- ; SDAG-FAKE16-NEXT: s_branch .LBB1_0
80
- ; SDAG-FAKE16-NEXT: .p2align 8
81
- ; SDAG-FAKE16-NEXT: ; %bb.2:
82
- ; SDAG-FAKE16-NEXT: .LBB1_0:
70
+ ; SDAG-FAKE16: ; %bb.0:
83
71
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
84
72
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
85
73
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s8
86
74
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
87
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
75
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
88
76
; SDAG-FAKE16-NEXT: s_endpgm
89
77
;
90
78
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -95,7 +83,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
95
83
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
96
84
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
97
85
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
98
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
86
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
99
87
; GISEL-REAL16-NEXT: s_endpgm
100
88
;
101
89
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -106,7 +94,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
106
94
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
107
95
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
108
96
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
109
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
97
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
110
98
; GISEL-FAKE16-NEXT: s_endpgm
111
99
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 %src ) #0
112
100
store i16 %cvt , ptr %out , align 2
@@ -120,7 +108,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
120
108
; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
121
109
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
122
110
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
123
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
111
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
124
112
; SDAG-REAL16-NEXT: s_endpgm
125
113
;
126
114
; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -129,7 +117,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
129
117
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
130
118
; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, 0x64
131
119
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
132
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
120
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
133
121
; SDAG-FAKE16-NEXT: s_endpgm
134
122
;
135
123
; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -138,7 +126,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
138
126
; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
139
127
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
140
128
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
141
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
129
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
142
130
; GISEL-REAL16-NEXT: s_endpgm
143
131
;
144
132
; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -147,7 +135,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
147
135
; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, 0x64
148
136
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
149
137
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
150
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
138
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
151
139
; GISEL-FAKE16-NEXT: s_endpgm
152
140
%cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 100 ) #0
153
141
store i16 %cvt , ptr %out , align 2
@@ -163,7 +151,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
163
151
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
164
152
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
165
153
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
166
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
154
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
167
155
; SDAG-REAL16-NEXT: s_endpgm
168
156
;
169
157
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -174,7 +162,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
174
162
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
175
163
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
176
164
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s2
177
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
165
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
178
166
; SDAG-FAKE16-NEXT: s_endpgm
179
167
;
180
168
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -185,7 +173,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
185
173
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
186
174
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
187
175
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
188
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
176
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
189
177
; GISEL-REAL16-NEXT: s_endpgm
190
178
;
191
179
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -196,7 +184,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
196
184
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
197
185
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
198
186
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
199
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
187
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
200
188
; GISEL-FAKE16-NEXT: s_endpgm
201
189
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 %src ) #0
202
190
store i16 %cvt , ptr %out , align 2
@@ -205,33 +193,21 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
205
193
206
194
define amdgpu_kernel void @sat_pk4_u4_u8_f32_s (i32 inreg %src , ptr %out ) #1 {
207
195
; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
208
- ; SDAG-REAL16: ; %bb.1:
209
- ; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
210
- ; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
211
- ; SDAG-REAL16-NEXT: s_branch .LBB4_0
212
- ; SDAG-REAL16-NEXT: .p2align 8
213
- ; SDAG-REAL16-NEXT: ; %bb.2:
214
- ; SDAG-REAL16-NEXT: .LBB4_0:
196
+ ; SDAG-REAL16: ; %bb.0:
215
197
; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
216
198
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
217
199
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
218
200
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
219
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
201
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
220
202
; SDAG-REAL16-NEXT: s_endpgm
221
203
;
222
204
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
223
- ; SDAG-FAKE16: ; %bb.1:
224
- ; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
225
- ; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
226
- ; SDAG-FAKE16-NEXT: s_branch .LBB4_0
227
- ; SDAG-FAKE16-NEXT: .p2align 8
228
- ; SDAG-FAKE16-NEXT: ; %bb.2:
229
- ; SDAG-FAKE16-NEXT: .LBB4_0:
205
+ ; SDAG-FAKE16: ; %bb.0:
230
206
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
231
207
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
232
208
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s8
233
209
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
234
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
210
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
235
211
; SDAG-FAKE16-NEXT: s_endpgm
236
212
;
237
213
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -242,7 +218,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
242
218
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
243
219
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
244
220
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
245
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
221
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
246
222
; GISEL-REAL16-NEXT: s_endpgm
247
223
;
248
224
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -253,7 +229,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
253
229
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
254
230
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
255
231
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
256
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
232
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
257
233
; GISEL-FAKE16-NEXT: s_endpgm
258
234
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 %src ) #0
259
235
store i16 %cvt , ptr %out , align 2
@@ -267,7 +243,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
267
243
; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
268
244
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
269
245
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
270
- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
246
+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
271
247
; SDAG-REAL16-NEXT: s_endpgm
272
248
;
273
249
; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -276,7 +252,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
276
252
; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
277
253
; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, 0x64
278
254
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
279
- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
255
+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
280
256
; SDAG-FAKE16-NEXT: s_endpgm
281
257
;
282
258
; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -285,7 +261,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
285
261
; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
286
262
; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
287
263
; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
288
- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
264
+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
289
265
; GISEL-REAL16-NEXT: s_endpgm
290
266
;
291
267
; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -294,7 +270,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
294
270
; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, 0x64
295
271
; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
296
272
; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
297
- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
273
+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
298
274
; GISEL-FAKE16-NEXT: s_endpgm
299
275
%cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 100 ) #0
300
276
store i16 %cvt , ptr %out , align 2
0 commit comments