@@ -985,9 +985,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
985985; GFX900-NEXT: ;;#ASMSTART
986986; GFX900-NEXT: ; def v1
987987; GFX900-NEXT: ;;#ASMEND
988+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
988989; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
989- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
990- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
991990; GFX900-NEXT: s_waitcnt vmcnt(0)
992991; GFX900-NEXT: s_setpc_b64 s[30:31]
993992;
@@ -998,9 +997,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
998997; GFX90A-NEXT: ;;#ASMSTART
999998; GFX90A-NEXT: ; def v1
1000999; GFX90A-NEXT: ;;#ASMEND
1000+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
10011001; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1002- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1003- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
10041002; GFX90A-NEXT: s_waitcnt vmcnt(0)
10051003; GFX90A-NEXT: s_setpc_b64 s[30:31]
10061004;
@@ -1011,9 +1009,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_1_1(ptr addrspace(1) inreg %ptr) {
10111009; GFX942-NEXT: ;;#ASMSTART
10121010; GFX942-NEXT: ; def v1
10131011; GFX942-NEXT: ;;#ASMEND
1012+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
10141013; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
1015- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1016- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
10171014; GFX942-NEXT: s_waitcnt vmcnt(0)
10181015; GFX942-NEXT: s_setpc_b64 s[30:31]
10191016 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
@@ -1030,9 +1027,8 @@ define void @v_shuffle_v3bf16_v2bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
10301027; GFX900-NEXT: ;;#ASMSTART
10311028; GFX900-NEXT: ; def v1
10321029; GFX900-NEXT: ;;#ASMEND
1030+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
10331031; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1034- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1035- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
10361032; GFX900-NEXT: s_waitcnt vmcnt(0)
10371033; GFX900-NEXT: s_setpc_b64 s[30:31]
10381034;
@@ -1043,9 +1039,8 @@ define void @v_shuffle_v3bf16_v2bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
10431039; GFX90A-NEXT: ;;#ASMSTART
10441040; GFX90A-NEXT: ; def v1
10451041; GFX90A-NEXT: ;;#ASMEND
1042+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
10461043; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1047- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1048- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
10491044; GFX90A-NEXT: s_waitcnt vmcnt(0)
10501045; GFX90A-NEXT: s_setpc_b64 s[30:31]
10511046;
@@ -1056,9 +1051,8 @@ define void @v_shuffle_v3bf16_v2bf16__0_1_1(ptr addrspace(1) inreg %ptr) {
10561051; GFX942-NEXT: ;;#ASMSTART
10571052; GFX942-NEXT: ; def v1
10581053; GFX942-NEXT: ;;#ASMEND
1054+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
10591055; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
1060- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1061- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
10621056; GFX942-NEXT: s_waitcnt vmcnt(0)
10631057; GFX942-NEXT: s_setpc_b64 s[30:31]
10641058 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
@@ -1126,9 +1120,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
11261120; GFX900-NEXT: ;;#ASMSTART
11271121; GFX900-NEXT: ; def v1
11281122; GFX900-NEXT: ;;#ASMEND
1123+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
11291124; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1130- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1131- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
11321125; GFX900-NEXT: s_waitcnt vmcnt(0)
11331126; GFX900-NEXT: s_setpc_b64 s[30:31]
11341127;
@@ -1139,9 +1132,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
11391132; GFX90A-NEXT: ;;#ASMSTART
11401133; GFX90A-NEXT: ; def v1
11411134; GFX90A-NEXT: ;;#ASMEND
1135+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
11421136; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1143- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1144- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
11451137; GFX90A-NEXT: s_waitcnt vmcnt(0)
11461138; GFX90A-NEXT: s_setpc_b64 s[30:31]
11471139;
@@ -1152,9 +1144,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_1_1(ptr addrspace(1) inreg %ptr) {
11521144; GFX942-NEXT: ;;#ASMSTART
11531145; GFX942-NEXT: ; def v1
11541146; GFX942-NEXT: ;;#ASMEND
1147+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
11551148; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
1156- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1157- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
11581149; GFX942-NEXT: s_waitcnt vmcnt(0)
11591150; GFX942-NEXT: s_setpc_b64 s[30:31]
11601151 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
@@ -1713,9 +1704,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
17131704; GFX900-NEXT: ;;#ASMSTART
17141705; GFX900-NEXT: ; def v1
17151706; GFX900-NEXT: ;;#ASMEND
1707+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
17161708; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1717- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1718- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
17191709; GFX900-NEXT: s_waitcnt vmcnt(0)
17201710; GFX900-NEXT: s_setpc_b64 s[30:31]
17211711;
@@ -1726,9 +1716,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
17261716; GFX90A-NEXT: ;;#ASMSTART
17271717; GFX90A-NEXT: ; def v1
17281718; GFX90A-NEXT: ;;#ASMEND
1719+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
17291720; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1730- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1731- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
17321721; GFX90A-NEXT: s_waitcnt vmcnt(0)
17331722; GFX90A-NEXT: s_setpc_b64 s[30:31]
17341723;
@@ -1739,9 +1728,8 @@ define void @v_shuffle_v3bf16_v2bf16__u_3_3(ptr addrspace(1) inreg %ptr) {
17391728; GFX942-NEXT: ;;#ASMSTART
17401729; GFX942-NEXT: ; def v1
17411730; GFX942-NEXT: ;;#ASMEND
1731+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
17421732; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
1743- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1744- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
17451733; GFX942-NEXT: s_waitcnt vmcnt(0)
17461734; GFX942-NEXT: s_setpc_b64 s[30:31]
17471735 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
@@ -1882,9 +1870,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
18821870; GFX900-NEXT: ;;#ASMSTART
18831871; GFX900-NEXT: ; def v1
18841872; GFX900-NEXT: ;;#ASMEND
1873+ ; GFX900-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
18851874; GFX900-NEXT: global_store_dword v0, v1, s[16:17]
1886- ; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1887- ; GFX900-NEXT: global_store_short v0, v1, s[16:17] offset:4
18881875; GFX900-NEXT: s_waitcnt vmcnt(0)
18891876; GFX900-NEXT: s_setpc_b64 s[30:31]
18901877;
@@ -1895,9 +1882,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
18951882; GFX90A-NEXT: ;;#ASMSTART
18961883; GFX90A-NEXT: ; def v1
18971884; GFX90A-NEXT: ;;#ASMEND
1885+ ; GFX90A-NEXT: global_store_short_d16_hi v0, v1, s[16:17] offset:4
18981886; GFX90A-NEXT: global_store_dword v0, v1, s[16:17]
1899- ; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1900- ; GFX90A-NEXT: global_store_short v0, v1, s[16:17] offset:4
19011887; GFX90A-NEXT: s_waitcnt vmcnt(0)
19021888; GFX90A-NEXT: s_setpc_b64 s[30:31]
19031889;
@@ -1908,9 +1894,8 @@ define void @v_shuffle_v3bf16_v2bf16__2_3_3(ptr addrspace(1) inreg %ptr) {
19081894; GFX942-NEXT: ;;#ASMSTART
19091895; GFX942-NEXT: ; def v1
19101896; GFX942-NEXT: ;;#ASMEND
1897+ ; GFX942-NEXT: global_store_short_d16_hi v0, v1, s[0:1] offset:4
19111898; GFX942-NEXT: global_store_dword v0, v1, s[0:1]
1912- ; GFX942-NEXT: v_lshrrev_b32_e32 v1, 16, v1
1913- ; GFX942-NEXT: global_store_short v0, v1, s[0:1] offset:4
19141899; GFX942-NEXT: s_waitcnt vmcnt(0)
19151900; GFX942-NEXT: s_setpc_b64 s[30:31]
19161901 %vec0 = call <2 x bfloat> asm "; def $0" , "=v" ()
0 commit comments