@@ -16,7 +16,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
1616; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
1717; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
1818; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
19- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
19+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
2020; SDAG-REAL16-NEXT: s_endpgm
2121;
2222; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -27,7 +27,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
2727; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
2828; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
2929; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s2
30- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
30+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
3131; SDAG-FAKE16-NEXT: s_endpgm
3232;
3333; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
3838; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
3939; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
4040; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
41- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
41+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
4242; GISEL-REAL16-NEXT: s_endpgm
4343;
4444; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_v:
@@ -49,7 +49,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
4949; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
5050; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
5151; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
52- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
52+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
5353; GISEL-FAKE16-NEXT: s_endpgm
5454 %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 %src ) #0
5555 store i16 %cvt , ptr %out , align 2
@@ -58,33 +58,21 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_v(i32 %src, ptr %out) #1 {
5858
5959define amdgpu_kernel void @sat_pk4_i4_i8_f32_s (i32 inreg %src , ptr %out ) #1 {
6060; SDAG-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
61- ; SDAG-REAL16: ; %bb.1:
62- ; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
63- ; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
64- ; SDAG-REAL16-NEXT: s_branch .LBB1_0
65- ; SDAG-REAL16-NEXT: .p2align 8
66- ; SDAG-REAL16-NEXT: ; %bb.2:
67- ; SDAG-REAL16-NEXT: .LBB1_0:
61+ ; SDAG-REAL16: ; %bb.0:
6862; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
6963; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s8
7064; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
7165; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
72- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
66+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
7367; SDAG-REAL16-NEXT: s_endpgm
7468;
7569; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
76- ; SDAG-FAKE16: ; %bb.1:
77- ; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
78- ; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
79- ; SDAG-FAKE16-NEXT: s_branch .LBB1_0
80- ; SDAG-FAKE16-NEXT: .p2align 8
81- ; SDAG-FAKE16-NEXT: ; %bb.2:
82- ; SDAG-FAKE16-NEXT: .LBB1_0:
70+ ; SDAG-FAKE16: ; %bb.0:
8371; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
8472; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
8573; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, s8
8674; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
87- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
75+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
8876; SDAG-FAKE16-NEXT: s_endpgm
8977;
9078; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -95,7 +83,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
9583; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
9684; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
9785; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, s2
98- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
86+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
9987; GISEL-REAL16-NEXT: s_endpgm
10088;
10189; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_s:
@@ -106,7 +94,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_s(i32 inreg %src, ptr %out) #1 {
10694; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
10795; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
10896; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, s2
109- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
97+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
11098; GISEL-FAKE16-NEXT: s_endpgm
11199 %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 %src ) #0
112100 store i16 %cvt , ptr %out , align 2
@@ -120,7 +108,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
120108; SDAG-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
121109; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
122110; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
123- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
111+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
124112; SDAG-REAL16-NEXT: s_endpgm
125113;
126114; SDAG-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -129,7 +117,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
129117; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
130118; SDAG-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v1, 0x64
131119; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
132- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
120+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
133121; SDAG-FAKE16-NEXT: s_endpgm
134122;
135123; GISEL-REAL16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -138,7 +126,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
138126; GISEL-REAL16-NEXT: v_sat_pk4_i4_i8_e32 v0.l, 0x64
139127; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
140128; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
141- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
129+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
142130; GISEL-REAL16-NEXT: s_endpgm
143131;
144132; GISEL-FAKE16-LABEL: sat_pk4_i4_i8_f32_i:
@@ -147,7 +135,7 @@ define amdgpu_kernel void @sat_pk4_i4_i8_f32_i(ptr %out) #1 {
147135; GISEL-FAKE16-NEXT: v_sat_pk4_i4_i8_e32 v0, 0x64
148136; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
149137; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
150- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
138+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
151139; GISEL-FAKE16-NEXT: s_endpgm
152140 %cvt = call i16 @llvm.amdgcn.sat.pk4.i4.i8 (i32 100 ) #0
153141 store i16 %cvt , ptr %out , align 2
@@ -163,7 +151,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
163151; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
164152; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
165153; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
166- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
154+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
167155; SDAG-REAL16-NEXT: s_endpgm
168156;
169157; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -174,7 +162,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
174162; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
175163; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
176164; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s2
177- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
165+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
178166; SDAG-FAKE16-NEXT: s_endpgm
179167;
180168; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -185,7 +173,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
185173; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
186174; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
187175; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
188- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
176+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
189177; GISEL-REAL16-NEXT: s_endpgm
190178;
191179; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_v:
@@ -196,7 +184,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
196184; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
197185; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
198186; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
199- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
187+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
200188; GISEL-FAKE16-NEXT: s_endpgm
201189 %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 %src ) #0
202190 store i16 %cvt , ptr %out , align 2
@@ -205,33 +193,21 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_v(i32 %src, ptr %out) #1 {
205193
206194define amdgpu_kernel void @sat_pk4_u4_u8_f32_s (i32 inreg %src , ptr %out ) #1 {
207195; SDAG-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
208- ; SDAG-REAL16: ; %bb.1:
209- ; SDAG-REAL16-NEXT: s_load_b32 s8, s[4:5], 0x0
210- ; SDAG-REAL16-NEXT: s_waitcnt lgkmcnt(0)
211- ; SDAG-REAL16-NEXT: s_branch .LBB4_0
212- ; SDAG-REAL16-NEXT: .p2align 8
213- ; SDAG-REAL16-NEXT: ; %bb.2:
214- ; SDAG-REAL16-NEXT: .LBB4_0:
196+ ; SDAG-REAL16: ; %bb.0:
215197; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
216198; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s8
217199; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
218200; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
219- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
201+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
220202; SDAG-REAL16-NEXT: s_endpgm
221203;
222204; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
223- ; SDAG-FAKE16: ; %bb.1:
224- ; SDAG-FAKE16-NEXT: s_load_b32 s8, s[4:5], 0x0
225- ; SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
226- ; SDAG-FAKE16-NEXT: s_branch .LBB4_0
227- ; SDAG-FAKE16-NEXT: .p2align 8
228- ; SDAG-FAKE16-NEXT: ; %bb.2:
229- ; SDAG-FAKE16-NEXT: .LBB4_0:
205+ ; SDAG-FAKE16: ; %bb.0:
230206; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
231207; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
232208; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, s8
233209; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
234- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
210+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
235211; SDAG-FAKE16-NEXT: s_endpgm
236212;
237213; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -242,7 +218,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
242218; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
243219; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
244220; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, s2
245- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
221+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
246222; GISEL-REAL16-NEXT: s_endpgm
247223;
248224; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_s:
@@ -253,7 +229,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_s(i32 inreg %src, ptr %out) #1 {
253229; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
254230; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
255231; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, s2
256- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
232+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
257233; GISEL-FAKE16-NEXT: s_endpgm
258234 %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 %src ) #0
259235 store i16 %cvt , ptr %out , align 2
@@ -267,7 +243,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
267243; SDAG-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
268244; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
269245; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
270- ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
246+ ; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
271247; SDAG-REAL16-NEXT: s_endpgm
272248;
273249; SDAG-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -276,7 +252,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
276252; SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, 0
277253; SDAG-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v1, 0x64
278254; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
279- ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1]
255+ ; SDAG-FAKE16-NEXT: flat_store_b16 v0, v1, s[0:1] scope:SCOPE_SE
280256; SDAG-FAKE16-NEXT: s_endpgm
281257;
282258; GISEL-REAL16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -285,7 +261,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
285261; GISEL-REAL16-NEXT: v_sat_pk4_u4_u8_e32 v0.l, 0x64
286262; GISEL-REAL16-NEXT: v_mov_b32_e32 v1, 0
287263; GISEL-REAL16-NEXT: s_wait_kmcnt 0x0
288- ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
264+ ; GISEL-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
289265; GISEL-REAL16-NEXT: s_endpgm
290266;
291267; GISEL-FAKE16-LABEL: sat_pk4_u4_u8_f32_i:
@@ -294,7 +270,7 @@ define amdgpu_kernel void @sat_pk4_u4_u8_f32_i(ptr %out) #1 {
294270; GISEL-FAKE16-NEXT: v_sat_pk4_u4_u8_e32 v0, 0x64
295271; GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0
296272; GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
297- ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1]
273+ ; GISEL-FAKE16-NEXT: flat_store_b16 v1, v0, s[0:1] scope:SCOPE_SE
298274; GISEL-FAKE16-NEXT: s_endpgm
299275 %cvt = call i16 @llvm.amdgcn.sat.pk4.u4.u8 (i32 100 ) #0
300276 store i16 %cvt , ptr %out , align 2
0 commit comments