@@ -1228,51 +1228,49 @@ define void @v_shuffle_v3bf16_v2bf16__3_u_1(ptr addrspace(1) inreg %ptr) {
12281228; GFX900-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
12291229; GFX900: ; %bb.0:
12301230; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1231+ ; GFX900-NEXT: v_mov_b32_e32 v0, 0
12311232; GFX900-NEXT: ;;#ASMSTART
12321233; GFX900-NEXT: ; def v1
12331234; GFX900-NEXT: ;;#ASMEND
12341235; GFX900-NEXT: ;;#ASMSTART
12351236; GFX900-NEXT: ; def v2
12361237; GFX900-NEXT: ;;#ASMEND
1237- ; GFX900-NEXT: v_mov_b32_e32 v0, 0
12381238; GFX900-NEXT: v_alignbit_b32 v2, s4, v2, 16
1239- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1239+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
12401240; GFX900-NEXT: global_store_dword v0, v2, s[16:17]
1241- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
12421241; GFX900-NEXT: s_waitcnt vmcnt(0)
12431242; GFX900-NEXT: s_setpc_b64 s[30:31]
12441243;
12451244; GFX90A-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
12461245; GFX90A: ; %bb.0:
12471246; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247+ ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
12481248; GFX90A-NEXT: ;;#ASMSTART
12491249; GFX90A-NEXT: ; def v1
12501250; GFX90A-NEXT: ;;#ASMEND
12511251; GFX90A-NEXT: ;;#ASMSTART
12521252; GFX90A-NEXT: ; def v2
12531253; GFX90A-NEXT: ;;#ASMEND
1254- ; GFX90A-NEXT: v_mov_b32_e32 v0, 0
12551254; GFX90A-NEXT: v_alignbit_b32 v2, s4, v2, 16
1256- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1255+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
12571256; GFX90A-NEXT: global_store_dword v0, v2, s[16:17]
1258- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
12591257; GFX90A-NEXT: s_waitcnt vmcnt(0)
12601258; GFX90A-NEXT: s_setpc_b64 s[30:31]
12611259;
12621260; GFX942-LABEL: v_shuffle_v3bf16_v2bf16__3_u_1:
12631261; GFX942: ; %bb.0:
12641262; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263+ ; GFX942-NEXT: v_mov_b32_e32 v0, 0
12651264; GFX942-NEXT: ;;#ASMSTART
12661265; GFX942-NEXT: ; def v1
12671266; GFX942-NEXT: ;;#ASMEND
12681267; GFX942-NEXT: ;;#ASMSTART
12691268; GFX942-NEXT: ; def v2
12701269; GFX942-NEXT: ;;#ASMEND
1271- ; GFX942-NEXT: v_mov_b32_e32 v0, 0
1270+ ; GFX942-NEXT: s_nop 0
12721271; GFX942-NEXT: v_alignbit_b32 v2, s0, v2, 16
1273- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1272+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
12741273; GFX942-NEXT: global_store_dword v0, v2, s[0:1]
1275- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
12761274; GFX942-NEXT: s_waitcnt vmcnt(0)
12771275; GFX942-NEXT: s_setpc_b64 s[30:31]
12781276 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
0 commit comments