@@ -171,31 +171,30 @@ define void @v_shuffle_v2f32_v2f32__3_0(ptr addrspace(1) inreg %ptr) {
171171; GFX90A: ; %bb.0:
172172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173173; GFX90A-NEXT: ;;#ASMSTART
174- ; GFX90A-NEXT: ; def v[2:3 ]
174+ ; GFX90A-NEXT: ; def v[0:1 ]
175175; GFX90A-NEXT: ;;#ASMEND
176176; GFX90A-NEXT: v_mov_b32_e32 v4, 0
177177; GFX90A-NEXT: ;;#ASMSTART
178- ; GFX90A-NEXT: ; def v[0:1 ]
178+ ; GFX90A-NEXT: ; def v[2:3 ]
179179; GFX90A-NEXT: ;;#ASMEND
180- ; GFX90A-NEXT: v_mov_b32_e32 v2, v3
181- ; GFX90A-NEXT: v_mov_b32_e32 v3, v0
182- ; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
180+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
181+ ; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
183182; GFX90A-NEXT: s_waitcnt vmcnt(0)
184183; GFX90A-NEXT: s_setpc_b64 s[30:31]
185184;
186185; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_0:
187186; GFX940: ; %bb.0:
188187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189188; GFX940-NEXT: ;;#ASMSTART
190- ; GFX940-NEXT: ; def v[2:3 ]
189+ ; GFX940-NEXT: ; def v[0:1 ]
191190; GFX940-NEXT: ;;#ASMEND
192191; GFX940-NEXT: v_mov_b32_e32 v4, 0
193192; GFX940-NEXT: ;;#ASMSTART
194- ; GFX940-NEXT: ; def v[0:1 ]
193+ ; GFX940-NEXT: ; def v[2:3 ]
195194; GFX940-NEXT: ;;#ASMEND
196- ; GFX940-NEXT: v_mov_b32_e32 v2, v3
197- ; GFX940-NEXT: v_mov_b32_e32 v3, v0
198- ; GFX940-NEXT: global_store_dwordx2 v4, v[2:3 ], s[0:1] sc0 sc1
195+ ; GFX940-NEXT: s_nop 0
196+ ; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[0:1] op_sel:[1,0]
197+ ; GFX940-NEXT: global_store_dwordx2 v4, v[0:1 ], s[0:1] sc0 sc1
199198; GFX940-NEXT: s_waitcnt vmcnt(0)
200199; GFX940-NEXT: s_setpc_b64 s[30:31]
201200 %vec0 = call <2 x float > asm "; def $0" , "=v" ()
@@ -274,27 +273,24 @@ define void @v_shuffle_v2f32_v2f32__3_2(ptr addrspace(1) inreg %ptr) {
274273; GFX90A-LABEL: v_shuffle_v2f32_v2f32__3_2:
275274; GFX90A: ; %bb.0:
276275; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277- ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
278276; GFX90A-NEXT: ;;#ASMSTART
279277; GFX90A-NEXT: ; def v[0:1]
280278; GFX90A-NEXT: ;;#ASMEND
281- ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
282- ; GFX90A-NEXT: v_mov_b32_e32 v3, v0
283- ; GFX90A-NEXT: global_store_dwordx2 v4 , v[2:3 ], s[16:17]
279+ ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
280+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
281+ ; GFX90A-NEXT: global_store_dwordx2 v2 , v[0:1 ], s[16:17]
284282; GFX90A-NEXT: s_waitcnt vmcnt(0)
285283; GFX90A-NEXT: s_setpc_b64 s[30:31]
286284;
287285; GFX940-LABEL: v_shuffle_v2f32_v2f32__3_2:
288286; GFX940: ; %bb.0:
289287; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290- ; GFX940-NEXT: v_mov_b32_e32 v4, 0
291288; GFX940-NEXT: ;;#ASMSTART
292289; GFX940-NEXT: ; def v[0:1]
293290; GFX940-NEXT: ;;#ASMEND
294- ; GFX940-NEXT: s_nop 0
295- ; GFX940-NEXT: v_mov_b32_e32 v2, v1
296- ; GFX940-NEXT: v_mov_b32_e32 v3, v0
297- ; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
291+ ; GFX940-NEXT: v_mov_b32_e32 v2, 0
292+ ; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
293+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
298294; GFX940-NEXT: s_waitcnt vmcnt(0)
299295; GFX940-NEXT: s_setpc_b64 s[30:31]
300296 %vec0 = call <2 x float > asm "; def $0" , "=v" ()
@@ -447,27 +443,24 @@ define void @v_shuffle_v2f32_v2f32__1_0(ptr addrspace(1) inreg %ptr) {
447443; GFX90A-LABEL: v_shuffle_v2f32_v2f32__1_0:
448444; GFX90A: ; %bb.0:
449445; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450- ; GFX90A-NEXT: v_mov_b32_e32 v4, 0
451446; GFX90A-NEXT: ;;#ASMSTART
452447; GFX90A-NEXT: ; def v[0:1]
453448; GFX90A-NEXT: ;;#ASMEND
454- ; GFX90A-NEXT: v_mov_b32_e32 v2, v1
455- ; GFX90A-NEXT: v_mov_b32_e32 v3, v0
456- ; GFX90A-NEXT: global_store_dwordx2 v4 , v[2:3 ], s[16:17]
449+ ; GFX90A-NEXT: v_mov_b32_e32 v2, 0
450+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
451+ ; GFX90A-NEXT: global_store_dwordx2 v2 , v[0:1 ], s[16:17]
457452; GFX90A-NEXT: s_waitcnt vmcnt(0)
458453; GFX90A-NEXT: s_setpc_b64 s[30:31]
459454;
460455; GFX940-LABEL: v_shuffle_v2f32_v2f32__1_0:
461456; GFX940: ; %bb.0:
462457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463- ; GFX940-NEXT: v_mov_b32_e32 v4, 0
464458; GFX940-NEXT: ;;#ASMSTART
465459; GFX940-NEXT: ; def v[0:1]
466460; GFX940-NEXT: ;;#ASMEND
467- ; GFX940-NEXT: s_nop 0
468- ; GFX940-NEXT: v_mov_b32_e32 v2, v1
469- ; GFX940-NEXT: v_mov_b32_e32 v3, v0
470- ; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
461+ ; GFX940-NEXT: v_mov_b32_e32 v2, 0
462+ ; GFX940-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[0:1] op_sel:[1,0]
463+ ; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
471464; GFX940-NEXT: s_waitcnt vmcnt(0)
472465; GFX940-NEXT: s_setpc_b64 s[30:31]
473466 %vec0 = call <2 x float > asm "; def $0" , "=v" ()
0 commit comments