@@ -968,25 +968,17 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
968968; GFX9-LABEL: extractelement_vgpr_v4i128_idx1:
969969; GFX9: ; %bb.0:
970970; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971- ; GFX9-NEXT: global_load_dwordx4 v[4:7 ], v[0:1], off offset:16
971+ ; GFX9-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:16
972972; GFX9-NEXT: s_waitcnt vmcnt(0)
973- ; GFX9-NEXT: v_mov_b32_e32 v0, v4
974- ; GFX9-NEXT: v_mov_b32_e32 v1, v5
975- ; GFX9-NEXT: v_mov_b32_e32 v2, v6
976- ; GFX9-NEXT: v_mov_b32_e32 v3, v7
977973; GFX9-NEXT: s_setpc_b64 s[30:31]
978974;
979975; GFX8-LABEL: extractelement_vgpr_v4i128_idx1:
980976; GFX8: ; %bb.0:
981977; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982978; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
983979; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
984- ; GFX8-NEXT: flat_load_dwordx4 v[4:7 ], v[0:1]
980+ ; GFX8-NEXT: flat_load_dwordx4 v[0:3 ], v[0:1]
985981; GFX8-NEXT: s_waitcnt vmcnt(0)
986- ; GFX8-NEXT: v_mov_b32_e32 v0, v4
987- ; GFX8-NEXT: v_mov_b32_e32 v1, v5
988- ; GFX8-NEXT: v_mov_b32_e32 v2, v6
989- ; GFX8-NEXT: v_mov_b32_e32 v3, v7
990982; GFX8-NEXT: s_setpc_b64 s[30:31]
991983;
992984; GFX7-LABEL: extractelement_vgpr_v4i128_idx1:
@@ -995,34 +987,24 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
995987; GFX7-NEXT: s_mov_b32 s6, 0
996988; GFX7-NEXT: s_mov_b32 s7, 0xf000
997989; GFX7-NEXT: s_mov_b64 s[4:5], 0
998- ; GFX7-NEXT: buffer_load_dwordx4 v[4:7 ], v[0:1], s[4:7], 0 addr64 offset:16
990+ ; GFX7-NEXT: buffer_load_dwordx4 v[0:3 ], v[0:1], s[4:7], 0 addr64 offset:16
999991; GFX7-NEXT: s_waitcnt vmcnt(0)
1000- ; GFX7-NEXT: v_mov_b32_e32 v0, v4
1001- ; GFX7-NEXT: v_mov_b32_e32 v1, v5
1002- ; GFX7-NEXT: v_mov_b32_e32 v2, v6
1003- ; GFX7-NEXT: v_mov_b32_e32 v3, v7
1004992; GFX7-NEXT: s_setpc_b64 s[30:31]
1005993;
1006994; GFX10-LABEL: extractelement_vgpr_v4i128_idx1:
1007995; GFX10: ; %bb.0:
1008996; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009997; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1010- ; GFX10-NEXT: global_load_dwordx4 v[4:7 ], v[0:1], off offset:16
998+ ; GFX10-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:16
1011999; GFX10-NEXT: s_waitcnt vmcnt(0)
1012- ; GFX10-NEXT: v_mov_b32_e32 v0, v4
1013- ; GFX10-NEXT: v_mov_b32_e32 v1, v5
1014- ; GFX10-NEXT: v_mov_b32_e32 v2, v6
1015- ; GFX10-NEXT: v_mov_b32_e32 v3, v7
10161000; GFX10-NEXT: s_setpc_b64 s[30:31]
10171001;
10181002; GFX11-LABEL: extractelement_vgpr_v4i128_idx1:
10191003; GFX11: ; %bb.0:
10201004; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10211005; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1022- ; GFX11-NEXT: global_load_b128 v[4:7 ], v[0:1], off offset:16
1006+ ; GFX11-NEXT: global_load_b128 v[0:3 ], v[0:1], off offset:16
10231007; GFX11-NEXT: s_waitcnt vmcnt(0)
1024- ; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1025- ; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v3, v7
10261008; GFX11-NEXT: s_setpc_b64 s[30:31]
10271009 %vector = load <4 x i128 >, <4 x i128 > addrspace (1 )* %ptr
10281010 %element = extractelement <4 x i128 > %vector , i32 1
@@ -1033,25 +1015,17 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
10331015; GFX9-LABEL: extractelement_vgpr_v4i128_idx2:
10341016; GFX9: ; %bb.0:
10351017; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036- ; GFX9-NEXT: global_load_dwordx4 v[8:11 ], v[0:1], off offset:32
1018+ ; GFX9-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:32
10371019; GFX9-NEXT: s_waitcnt vmcnt(0)
1038- ; GFX9-NEXT: v_mov_b32_e32 v0, v8
1039- ; GFX9-NEXT: v_mov_b32_e32 v1, v9
1040- ; GFX9-NEXT: v_mov_b32_e32 v2, v10
1041- ; GFX9-NEXT: v_mov_b32_e32 v3, v11
10421020; GFX9-NEXT: s_setpc_b64 s[30:31]
10431021;
10441022; GFX8-LABEL: extractelement_vgpr_v4i128_idx2:
10451023; GFX8: ; %bb.0:
10461024; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10471025; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0
10481026; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1049- ; GFX8-NEXT: flat_load_dwordx4 v[8:11 ], v[0:1]
1027+ ; GFX8-NEXT: flat_load_dwordx4 v[0:3 ], v[0:1]
10501028; GFX8-NEXT: s_waitcnt vmcnt(0)
1051- ; GFX8-NEXT: v_mov_b32_e32 v0, v8
1052- ; GFX8-NEXT: v_mov_b32_e32 v1, v9
1053- ; GFX8-NEXT: v_mov_b32_e32 v2, v10
1054- ; GFX8-NEXT: v_mov_b32_e32 v3, v11
10551029; GFX8-NEXT: s_setpc_b64 s[30:31]
10561030;
10571031; GFX7-LABEL: extractelement_vgpr_v4i128_idx2:
@@ -1060,34 +1034,24 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
10601034; GFX7-NEXT: s_mov_b32 s6, 0
10611035; GFX7-NEXT: s_mov_b32 s7, 0xf000
10621036; GFX7-NEXT: s_mov_b64 s[4:5], 0
1063- ; GFX7-NEXT: buffer_load_dwordx4 v[8:11 ], v[0:1], s[4:7], 0 addr64 offset:32
1037+ ; GFX7-NEXT: buffer_load_dwordx4 v[0:3 ], v[0:1], s[4:7], 0 addr64 offset:32
10641038; GFX7-NEXT: s_waitcnt vmcnt(0)
1065- ; GFX7-NEXT: v_mov_b32_e32 v0, v8
1066- ; GFX7-NEXT: v_mov_b32_e32 v1, v9
1067- ; GFX7-NEXT: v_mov_b32_e32 v2, v10
1068- ; GFX7-NEXT: v_mov_b32_e32 v3, v11
10691039; GFX7-NEXT: s_setpc_b64 s[30:31]
10701040;
10711041; GFX10-LABEL: extractelement_vgpr_v4i128_idx2:
10721042; GFX10: ; %bb.0:
10731043; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10741044; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1075- ; GFX10-NEXT: global_load_dwordx4 v[8:11 ], v[0:1], off offset:32
1045+ ; GFX10-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:32
10761046; GFX10-NEXT: s_waitcnt vmcnt(0)
1077- ; GFX10-NEXT: v_mov_b32_e32 v0, v8
1078- ; GFX10-NEXT: v_mov_b32_e32 v1, v9
1079- ; GFX10-NEXT: v_mov_b32_e32 v2, v10
1080- ; GFX10-NEXT: v_mov_b32_e32 v3, v11
10811047; GFX10-NEXT: s_setpc_b64 s[30:31]
10821048;
10831049; GFX11-LABEL: extractelement_vgpr_v4i128_idx2:
10841050; GFX11: ; %bb.0:
10851051; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10861052; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1087- ; GFX11-NEXT: global_load_b128 v[8:11 ], v[0:1], off offset:32
1053+ ; GFX11-NEXT: global_load_b128 v[0:3 ], v[0:1], off offset:32
10881054; GFX11-NEXT: s_waitcnt vmcnt(0)
1089- ; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9
1090- ; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11
10911055; GFX11-NEXT: s_setpc_b64 s[30:31]
10921056 %vector = load <4 x i128 >, <4 x i128 > addrspace (1 )* %ptr
10931057 %element = extractelement <4 x i128 > %vector , i32 2
@@ -1098,25 +1062,17 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
10981062; GFX9-LABEL: extractelement_vgpr_v4i128_idx3:
10991063; GFX9: ; %bb.0:
11001064; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101- ; GFX9-NEXT: global_load_dwordx4 v[12:15 ], v[0:1], off offset:48
1065+ ; GFX9-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:48
11021066; GFX9-NEXT: s_waitcnt vmcnt(0)
1103- ; GFX9-NEXT: v_mov_b32_e32 v0, v12
1104- ; GFX9-NEXT: v_mov_b32_e32 v1, v13
1105- ; GFX9-NEXT: v_mov_b32_e32 v2, v14
1106- ; GFX9-NEXT: v_mov_b32_e32 v3, v15
11071067; GFX9-NEXT: s_setpc_b64 s[30:31]
11081068;
11091069; GFX8-LABEL: extractelement_vgpr_v4i128_idx3:
11101070; GFX8: ; %bb.0:
11111071; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11121072; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0
11131073; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1114- ; GFX8-NEXT: flat_load_dwordx4 v[12:15 ], v[0:1]
1074+ ; GFX8-NEXT: flat_load_dwordx4 v[0:3 ], v[0:1]
11151075; GFX8-NEXT: s_waitcnt vmcnt(0)
1116- ; GFX8-NEXT: v_mov_b32_e32 v0, v12
1117- ; GFX8-NEXT: v_mov_b32_e32 v1, v13
1118- ; GFX8-NEXT: v_mov_b32_e32 v2, v14
1119- ; GFX8-NEXT: v_mov_b32_e32 v3, v15
11201076; GFX8-NEXT: s_setpc_b64 s[30:31]
11211077;
11221078; GFX7-LABEL: extractelement_vgpr_v4i128_idx3:
@@ -1125,34 +1081,24 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
11251081; GFX7-NEXT: s_mov_b32 s6, 0
11261082; GFX7-NEXT: s_mov_b32 s7, 0xf000
11271083; GFX7-NEXT: s_mov_b64 s[4:5], 0
1128- ; GFX7-NEXT: buffer_load_dwordx4 v[12:15 ], v[0:1], s[4:7], 0 addr64 offset:48
1084+ ; GFX7-NEXT: buffer_load_dwordx4 v[0:3 ], v[0:1], s[4:7], 0 addr64 offset:48
11291085; GFX7-NEXT: s_waitcnt vmcnt(0)
1130- ; GFX7-NEXT: v_mov_b32_e32 v0, v12
1131- ; GFX7-NEXT: v_mov_b32_e32 v1, v13
1132- ; GFX7-NEXT: v_mov_b32_e32 v2, v14
1133- ; GFX7-NEXT: v_mov_b32_e32 v3, v15
11341086; GFX7-NEXT: s_setpc_b64 s[30:31]
11351087;
11361088; GFX10-LABEL: extractelement_vgpr_v4i128_idx3:
11371089; GFX10: ; %bb.0:
11381090; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11391091; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1140- ; GFX10-NEXT: global_load_dwordx4 v[12:15 ], v[0:1], off offset:48
1092+ ; GFX10-NEXT: global_load_dwordx4 v[0:3 ], v[0:1], off offset:48
11411093; GFX10-NEXT: s_waitcnt vmcnt(0)
1142- ; GFX10-NEXT: v_mov_b32_e32 v0, v12
1143- ; GFX10-NEXT: v_mov_b32_e32 v1, v13
1144- ; GFX10-NEXT: v_mov_b32_e32 v2, v14
1145- ; GFX10-NEXT: v_mov_b32_e32 v3, v15
11461094; GFX10-NEXT: s_setpc_b64 s[30:31]
11471095;
11481096; GFX11-LABEL: extractelement_vgpr_v4i128_idx3:
11491097; GFX11: ; %bb.0:
11501098; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11511099; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1152- ; GFX11-NEXT: global_load_b128 v[12:15 ], v[0:1], off offset:48
1100+ ; GFX11-NEXT: global_load_b128 v[0:3 ], v[0:1], off offset:48
11531101; GFX11-NEXT: s_waitcnt vmcnt(0)
1154- ; GFX11-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
1155- ; GFX11-NEXT: v_dual_mov_b32 v2, v14 :: v_dual_mov_b32 v3, v15
11561102; GFX11-NEXT: s_setpc_b64 s[30:31]
11571103 %vector = load <4 x i128 >, <4 x i128 > addrspace (1 )* %ptr
11581104 %element = extractelement <4 x i128 > %vector , i32 3
0 commit comments