Skip to content

Commit c060de7

Browse files
GlobalISel: Fix artifact combine value finder look through copy
Search for COPY source in instruction we get from look through (not copy dst). Differential Revision: https://reviews.llvm.org/D137273
1 parent 74d5c3c commit c060de7

File tree

4 files changed

+147
-180
lines changed

4 files changed

+147
-180
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,10 @@ class LegalizationArtifactCombiner {
724724
/// and its callees rely upon.
725725
Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
726726
unsigned Size) {
727-
MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
727+
Optional<DefinitionAndSourceRegister> DefSrcReg =
728+
getDefSrcRegIgnoringCopies(DefReg, MRI);
729+
MachineInstr *Def = DefSrcReg->MI;
730+
DefReg = DefSrcReg->Reg;
728731
// If the instruction has a single def, then simply delegate the search.
729732
// For unmerge however with multiple defs, we need to compute the offset
730733
// into the source of the unmerge.

llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,21 @@ body: |
252252
%7:_(<5 x s32>) = G_BUILD_VECTOR %3, %4, %5, %6, %2
253253
$vgpr5_vgpr6_vgpr7_vgpr8_vgpr9= COPY %7
254254
...
255+
256+
---
257+
name: value_finder_look_through_copy
258+
body: |
259+
bb.0:
260+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
261+
262+
; GFX9-LABEL: name: value_finder_look_through_copy
263+
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
264+
; GFX9-NEXT: {{ $}}
265+
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
266+
; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x s32>)
267+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
268+
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>)
269+
%3:_(s32) = COPY %1
270+
%4:_(<2 x s32>) = G_BUILD_VECTOR %3, %2
271+
$vgpr2_vgpr3= COPY %4
272+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll

Lines changed: 15 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -968,25 +968,17 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
968968
; GFX9-LABEL: extractelement_vgpr_v4i128_idx1:
969969
; GFX9: ; %bb.0:
970970
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971-
; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
971+
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16
972972
; GFX9-NEXT: s_waitcnt vmcnt(0)
973-
; GFX9-NEXT: v_mov_b32_e32 v0, v4
974-
; GFX9-NEXT: v_mov_b32_e32 v1, v5
975-
; GFX9-NEXT: v_mov_b32_e32 v2, v6
976-
; GFX9-NEXT: v_mov_b32_e32 v3, v7
977973
; GFX9-NEXT: s_setpc_b64 s[30:31]
978974
;
979975
; GFX8-LABEL: extractelement_vgpr_v4i128_idx1:
980976
; GFX8: ; %bb.0:
981977
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982978
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
983979
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
984-
; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
980+
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
985981
; GFX8-NEXT: s_waitcnt vmcnt(0)
986-
; GFX8-NEXT: v_mov_b32_e32 v0, v4
987-
; GFX8-NEXT: v_mov_b32_e32 v1, v5
988-
; GFX8-NEXT: v_mov_b32_e32 v2, v6
989-
; GFX8-NEXT: v_mov_b32_e32 v3, v7
990982
; GFX8-NEXT: s_setpc_b64 s[30:31]
991983
;
992984
; GFX7-LABEL: extractelement_vgpr_v4i128_idx1:
@@ -995,34 +987,24 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
995987
; GFX7-NEXT: s_mov_b32 s6, 0
996988
; GFX7-NEXT: s_mov_b32 s7, 0xf000
997989
; GFX7-NEXT: s_mov_b64 s[4:5], 0
998-
; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
990+
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:16
999991
; GFX7-NEXT: s_waitcnt vmcnt(0)
1000-
; GFX7-NEXT: v_mov_b32_e32 v0, v4
1001-
; GFX7-NEXT: v_mov_b32_e32 v1, v5
1002-
; GFX7-NEXT: v_mov_b32_e32 v2, v6
1003-
; GFX7-NEXT: v_mov_b32_e32 v3, v7
1004992
; GFX7-NEXT: s_setpc_b64 s[30:31]
1005993
;
1006994
; GFX10-LABEL: extractelement_vgpr_v4i128_idx1:
1007995
; GFX10: ; %bb.0:
1008996
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009997
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1010-
; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
998+
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16
1011999
; GFX10-NEXT: s_waitcnt vmcnt(0)
1012-
; GFX10-NEXT: v_mov_b32_e32 v0, v4
1013-
; GFX10-NEXT: v_mov_b32_e32 v1, v5
1014-
; GFX10-NEXT: v_mov_b32_e32 v2, v6
1015-
; GFX10-NEXT: v_mov_b32_e32 v3, v7
10161000
; GFX10-NEXT: s_setpc_b64 s[30:31]
10171001
;
10181002
; GFX11-LABEL: extractelement_vgpr_v4i128_idx1:
10191003
; GFX11: ; %bb.0:
10201004
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10211005
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1022-
; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
1006+
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16
10231007
; GFX11-NEXT: s_waitcnt vmcnt(0)
1024-
; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1025-
; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v3, v7
10261008
; GFX11-NEXT: s_setpc_b64 s[30:31]
10271009
%vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
10281010
%element = extractelement <4 x i128> %vector, i32 1
@@ -1033,25 +1015,17 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
10331015
; GFX9-LABEL: extractelement_vgpr_v4i128_idx2:
10341016
; GFX9: ; %bb.0:
10351017
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036-
; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
1018+
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32
10371019
; GFX9-NEXT: s_waitcnt vmcnt(0)
1038-
; GFX9-NEXT: v_mov_b32_e32 v0, v8
1039-
; GFX9-NEXT: v_mov_b32_e32 v1, v9
1040-
; GFX9-NEXT: v_mov_b32_e32 v2, v10
1041-
; GFX9-NEXT: v_mov_b32_e32 v3, v11
10421020
; GFX9-NEXT: s_setpc_b64 s[30:31]
10431021
;
10441022
; GFX8-LABEL: extractelement_vgpr_v4i128_idx2:
10451023
; GFX8: ; %bb.0:
10461024
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10471025
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0
10481026
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1049-
; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
1027+
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
10501028
; GFX8-NEXT: s_waitcnt vmcnt(0)
1051-
; GFX8-NEXT: v_mov_b32_e32 v0, v8
1052-
; GFX8-NEXT: v_mov_b32_e32 v1, v9
1053-
; GFX8-NEXT: v_mov_b32_e32 v2, v10
1054-
; GFX8-NEXT: v_mov_b32_e32 v3, v11
10551029
; GFX8-NEXT: s_setpc_b64 s[30:31]
10561030
;
10571031
; GFX7-LABEL: extractelement_vgpr_v4i128_idx2:
@@ -1060,34 +1034,24 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
10601034
; GFX7-NEXT: s_mov_b32 s6, 0
10611035
; GFX7-NEXT: s_mov_b32 s7, 0xf000
10621036
; GFX7-NEXT: s_mov_b64 s[4:5], 0
1063-
; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32
1037+
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:32
10641038
; GFX7-NEXT: s_waitcnt vmcnt(0)
1065-
; GFX7-NEXT: v_mov_b32_e32 v0, v8
1066-
; GFX7-NEXT: v_mov_b32_e32 v1, v9
1067-
; GFX7-NEXT: v_mov_b32_e32 v2, v10
1068-
; GFX7-NEXT: v_mov_b32_e32 v3, v11
10691039
; GFX7-NEXT: s_setpc_b64 s[30:31]
10701040
;
10711041
; GFX10-LABEL: extractelement_vgpr_v4i128_idx2:
10721042
; GFX10: ; %bb.0:
10731043
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10741044
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1075-
; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
1045+
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32
10761046
; GFX10-NEXT: s_waitcnt vmcnt(0)
1077-
; GFX10-NEXT: v_mov_b32_e32 v0, v8
1078-
; GFX10-NEXT: v_mov_b32_e32 v1, v9
1079-
; GFX10-NEXT: v_mov_b32_e32 v2, v10
1080-
; GFX10-NEXT: v_mov_b32_e32 v3, v11
10811047
; GFX10-NEXT: s_setpc_b64 s[30:31]
10821048
;
10831049
; GFX11-LABEL: extractelement_vgpr_v4i128_idx2:
10841050
; GFX11: ; %bb.0:
10851051
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10861052
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1087-
; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32
1053+
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32
10881054
; GFX11-NEXT: s_waitcnt vmcnt(0)
1089-
; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9
1090-
; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11
10911055
; GFX11-NEXT: s_setpc_b64 s[30:31]
10921056
%vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
10931057
%element = extractelement <4 x i128> %vector, i32 2
@@ -1098,25 +1062,17 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
10981062
; GFX9-LABEL: extractelement_vgpr_v4i128_idx3:
10991063
; GFX9: ; %bb.0:
11001064
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101-
; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
1065+
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48
11021066
; GFX9-NEXT: s_waitcnt vmcnt(0)
1103-
; GFX9-NEXT: v_mov_b32_e32 v0, v12
1104-
; GFX9-NEXT: v_mov_b32_e32 v1, v13
1105-
; GFX9-NEXT: v_mov_b32_e32 v2, v14
1106-
; GFX9-NEXT: v_mov_b32_e32 v3, v15
11071067
; GFX9-NEXT: s_setpc_b64 s[30:31]
11081068
;
11091069
; GFX8-LABEL: extractelement_vgpr_v4i128_idx3:
11101070
; GFX8: ; %bb.0:
11111071
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11121072
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0
11131073
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1114-
; GFX8-NEXT: flat_load_dwordx4 v[12:15], v[0:1]
1074+
; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
11151075
; GFX8-NEXT: s_waitcnt vmcnt(0)
1116-
; GFX8-NEXT: v_mov_b32_e32 v0, v12
1117-
; GFX8-NEXT: v_mov_b32_e32 v1, v13
1118-
; GFX8-NEXT: v_mov_b32_e32 v2, v14
1119-
; GFX8-NEXT: v_mov_b32_e32 v3, v15
11201076
; GFX8-NEXT: s_setpc_b64 s[30:31]
11211077
;
11221078
; GFX7-LABEL: extractelement_vgpr_v4i128_idx3:
@@ -1125,34 +1081,24 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
11251081
; GFX7-NEXT: s_mov_b32 s6, 0
11261082
; GFX7-NEXT: s_mov_b32 s7, 0xf000
11271083
; GFX7-NEXT: s_mov_b64 s[4:5], 0
1128-
; GFX7-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48
1084+
; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:48
11291085
; GFX7-NEXT: s_waitcnt vmcnt(0)
1130-
; GFX7-NEXT: v_mov_b32_e32 v0, v12
1131-
; GFX7-NEXT: v_mov_b32_e32 v1, v13
1132-
; GFX7-NEXT: v_mov_b32_e32 v2, v14
1133-
; GFX7-NEXT: v_mov_b32_e32 v3, v15
11341086
; GFX7-NEXT: s_setpc_b64 s[30:31]
11351087
;
11361088
; GFX10-LABEL: extractelement_vgpr_v4i128_idx3:
11371089
; GFX10: ; %bb.0:
11381090
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11391091
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1140-
; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
1092+
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48
11411093
; GFX10-NEXT: s_waitcnt vmcnt(0)
1142-
; GFX10-NEXT: v_mov_b32_e32 v0, v12
1143-
; GFX10-NEXT: v_mov_b32_e32 v1, v13
1144-
; GFX10-NEXT: v_mov_b32_e32 v2, v14
1145-
; GFX10-NEXT: v_mov_b32_e32 v3, v15
11461094
; GFX10-NEXT: s_setpc_b64 s[30:31]
11471095
;
11481096
; GFX11-LABEL: extractelement_vgpr_v4i128_idx3:
11491097
; GFX11: ; %bb.0:
11501098
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11511099
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1152-
; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48
1100+
; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48
11531101
; GFX11-NEXT: s_waitcnt vmcnt(0)
1154-
; GFX11-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13
1155-
; GFX11-NEXT: v_dual_mov_b32 v2, v14 :: v_dual_mov_b32 v3, v15
11561102
; GFX11-NEXT: s_setpc_b64 s[30:31]
11571103
%vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
11581104
%element = extractelement <4 x i128> %vector, i32 3

0 commit comments

Comments
 (0)