Skip to content

Commit 6d5f87f

Browse files
authored
Revert "DAG: Allow select ptr combine for non-0 address spaces" (#168292)
Reverts #167909
1 parent 1a7cb1e commit 6d5f87f

File tree

12 files changed

+221
-238
lines changed

12 files changed

+221
-238
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29033,9 +29033,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
2903329033
// over-conservative. It would be beneficial to be able to remember
2903429034
// both potential memory locations. Since we are discarding
2903529035
// src value info, don't do the transformation if the memory
29036-
// locations are not in the same address space.
29037-
LLD->getPointerInfo().getAddrSpace() !=
29038-
RLD->getPointerInfo().getAddrSpace() ||
29036+
// locations are not in the default address space.
29037+
LLD->getPointerInfo().getAddrSpace() != 0 ||
29038+
RLD->getPointerInfo().getAddrSpace() != 0 ||
2903929039
// We can't produce a CMOV of a TargetFrameIndex since we won't
2904029040
// generate the address generation required.
2904129041
LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
@@ -29117,9 +29117,6 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
2911729117
// but the new load must be the minimum (most restrictive) alignment of the
2911829118
// inputs.
2911929119
Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
29120-
unsigned AddrSpace = LLD->getAddressSpace();
29121-
assert(AddrSpace == RLD->getAddressSpace());
29122-
2912329120
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
2912429121
if (!RLD->isInvariant())
2912529122
MMOFlags &= ~MachineMemOperand::MOInvariant;
@@ -29128,16 +29125,15 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
2912829125
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
2912929126
// FIXME: Discards pointer and AA info.
2913029127
Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
29131-
LLD->getChain(), Addr, MachinePointerInfo(AddrSpace),
29132-
Alignment, MMOFlags);
29128+
LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
29129+
MMOFlags);
2913329130
} else {
2913429131
// FIXME: Discards pointer and AA info.
2913529132
Load = DAG.getExtLoad(
2913629133
LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
2913729134
: LLD->getExtensionType(),
2913829135
SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
29139-
MachinePointerInfo(AddrSpace), LLD->getMemoryVT(), Alignment,
29140-
MMOFlags);
29136+
MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
2914129137
}
2914229138

2914329139
// Users of the select now use the result of the load.

llvm/test/CodeGen/AMDGPU/load-select-ptr.ll

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,27 @@
77
define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], ptr %ptr0, [8 x i32], ptr %ptr1, [8 x i32], ptr addrspace(1) %ptr2) {
88
; GCN-LABEL: select_ptr_crash_i64_flat:
99
; GCN: ; %bb.0:
10+
; GCN-NEXT: s_load_dword s6, s[8:9], 0x0
11+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x28
12+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x50
13+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x78
1014
; GCN-NEXT: s_add_i32 s12, s12, s17
1115
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
12-
; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
13-
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x78
14-
; GCN-NEXT: s_add_u32 s4, s8, 40
15-
; GCN-NEXT: s_addc_u32 s3, s9, 0
16-
; GCN-NEXT: s_add_u32 s5, s8, 0x50
17-
; GCN-NEXT: s_addc_u32 s6, s9, 0
1816
; GCN-NEXT: s_waitcnt lgkmcnt(0)
19-
; GCN-NEXT: s_cmp_eq_u32 s2, 0
20-
; GCN-NEXT: s_cselect_b32 s3, s3, s6
21-
; GCN-NEXT: s_cselect_b32 s2, s4, s5
22-
; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
17+
; GCN-NEXT: s_cmp_eq_u32 s6, 0
18+
; GCN-NEXT: s_cselect_b32 s0, s0, s2
19+
; GCN-NEXT: s_cselect_b32 s1, s1, s3
20+
; GCN-NEXT: v_mov_b32_e32 v0, s0
21+
; GCN-NEXT: v_mov_b32_e32 v1, s1
22+
; GCN-NEXT: s_add_u32 s0, s0, 4
2323
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
24-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
25-
; GCN-NEXT: v_mov_b32_e32 v0, s2
26-
; GCN-NEXT: v_mov_b32_e32 v1, s3
27-
; GCN-NEXT: s_add_u32 s2, s2, 4
24+
; GCN-NEXT: s_addc_u32 s1, s1, 0
2825
; GCN-NEXT: flat_load_dword v0, v[0:1]
29-
; GCN-NEXT: s_addc_u32 s3, s3, 0
30-
; GCN-NEXT: v_mov_b32_e32 v1, s2
31-
; GCN-NEXT: v_mov_b32_e32 v2, s3
26+
; GCN-NEXT: v_mov_b32_e32 v2, s1
27+
; GCN-NEXT: v_mov_b32_e32 v1, s0
3228
; GCN-NEXT: flat_load_dword v1, v[1:2]
33-
; GCN-NEXT: v_mov_b32_e32 v3, s1
34-
; GCN-NEXT: v_mov_b32_e32 v2, s0
29+
; GCN-NEXT: v_mov_b32_e32 v2, s4
30+
; GCN-NEXT: v_mov_b32_e32 v3, s5
3531
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
3632
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
3733
; GCN-NEXT: s_endpgm
@@ -49,28 +45,25 @@ define amdgpu_kernel void @select_ptr_crash_i64_flat(i32 %tmp, [8 x i32], ptr %p
4945
define amdgpu_kernel void @select_ptr_crash_i64_global(i32 %tmp, [8 x i32], ptr addrspace(1) %ptr0, [8 x i32], ptr addrspace(1) %ptr1, [8 x i32], ptr addrspace(1) %ptr2) {
5046
; GCN-LABEL: select_ptr_crash_i64_global:
5147
; GCN: ; %bb.0:
48+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x28
49+
; GCN-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x50
50+
; GCN-NEXT: s_load_dword s6, s[8:9], 0x0
51+
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x78
5252
; GCN-NEXT: s_add_i32 s12, s12, s17
5353
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
54-
; GCN-NEXT: s_load_dword s2, s[8:9], 0x0
55-
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x78
56-
; GCN-NEXT: s_add_u32 s4, s8, 40
57-
; GCN-NEXT: s_addc_u32 s3, s9, 0
58-
; GCN-NEXT: s_add_u32 s5, s8, 0x50
59-
; GCN-NEXT: s_addc_u32 s6, s9, 0
6054
; GCN-NEXT: s_waitcnt lgkmcnt(0)
61-
; GCN-NEXT: s_cmp_eq_u32 s2, 0
62-
; GCN-NEXT: s_cselect_b32 s3, s3, s6
63-
; GCN-NEXT: s_cselect_b32 s2, s4, s5
55+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
6456
; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
65-
; GCN-NEXT: v_mov_b32_e32 v0, s0
57+
; GCN-NEXT: s_cmp_eq_u32 s6, 0
58+
; GCN-NEXT: v_mov_b32_e32 v2, s4
6659
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
67-
; GCN-NEXT: v_mov_b32_e32 v1, s1
68-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
69-
; GCN-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
60+
; GCN-NEXT: v_mov_b32_e32 v3, s5
7061
; GCN-NEXT: s_waitcnt lgkmcnt(0)
71-
; GCN-NEXT: v_mov_b32_e32 v2, s2
72-
; GCN-NEXT: v_mov_b32_e32 v3, s3
73-
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
62+
; GCN-NEXT: s_cselect_b32 s1, s1, s3
63+
; GCN-NEXT: s_cselect_b32 s0, s0, s2
64+
; GCN-NEXT: v_mov_b32_e32 v0, s0
65+
; GCN-NEXT: v_mov_b32_e32 v1, s1
66+
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
7467
; GCN-NEXT: s_endpgm
7568
%tmp2 = icmp eq i32 %tmp, 0
7669
%tmp3 = load i64, ptr addrspace(1) %ptr0, align 8
@@ -85,18 +78,22 @@ define amdgpu_kernel void @select_ptr_crash_i64_local(i32 %tmp, ptr addrspace(3)
8578
; GCN: ; %bb.0:
8679
; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
8780
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
81+
; GCN-NEXT: s_mov_b32 m0, -1
8882
; GCN-NEXT: s_add_i32 s12, s12, s17
8983
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
90-
; GCN-NEXT: s_mov_b32 m0, -1
9184
; GCN-NEXT: s_waitcnt lgkmcnt(0)
92-
; GCN-NEXT: s_cmp_eq_u32 s0, 0
93-
; GCN-NEXT: s_cselect_b32 s0, s1, s2
94-
; GCN-NEXT: v_mov_b32_e32 v0, s0
85+
; GCN-NEXT: v_mov_b32_e32 v0, s1
86+
; GCN-NEXT: v_mov_b32_e32 v2, s2
9587
; GCN-NEXT: ds_read_b64 v[0:1], v0
96-
; GCN-NEXT: v_mov_b32_e32 v2, s4
88+
; GCN-NEXT: ds_read_b64 v[2:3], v2
89+
; GCN-NEXT: s_cmp_eq_u32 s0, 0
90+
; GCN-NEXT: s_cselect_b64 vcc, -1, 0
9791
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
98-
; GCN-NEXT: v_mov_b32_e32 v3, s5
9992
; GCN-NEXT: s_waitcnt lgkmcnt(0)
93+
; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
94+
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
95+
; GCN-NEXT: v_mov_b32_e32 v2, s4
96+
; GCN-NEXT: v_mov_b32_e32 v3, s5
10097
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
10198
; GCN-NEXT: s_endpgm
10299
%tmp2 = icmp eq i32 %tmp, 0
@@ -115,20 +112,22 @@ define amdgpu_kernel void @select_ptr_crash_i64_local_offsets(i32 %tmp, ptr addr
115112
; GCN: ; %bb.0:
116113
; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
117114
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
115+
; GCN-NEXT: s_mov_b32 m0, -1
118116
; GCN-NEXT: s_add_i32 s12, s12, s17
119117
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
120-
; GCN-NEXT: s_mov_b32 m0, -1
121118
; GCN-NEXT: s_waitcnt lgkmcnt(0)
122-
; GCN-NEXT: s_addk_i32 s1, 0x80
123-
; GCN-NEXT: s_addk_i32 s2, 0x200
119+
; GCN-NEXT: v_mov_b32_e32 v0, s1
120+
; GCN-NEXT: v_mov_b32_e32 v2, s2
121+
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128
122+
; GCN-NEXT: ds_read_b64 v[2:3], v2 offset:512
124123
; GCN-NEXT: s_cmp_eq_u32 s0, 0
125-
; GCN-NEXT: s_cselect_b32 s0, s1, s2
126-
; GCN-NEXT: v_mov_b32_e32 v0, s0
127-
; GCN-NEXT: ds_read_b64 v[0:1], v0
128-
; GCN-NEXT: v_mov_b32_e32 v2, s4
124+
; GCN-NEXT: s_cselect_b64 vcc, -1, 0
129125
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13
130-
; GCN-NEXT: v_mov_b32_e32 v3, s5
131126
; GCN-NEXT: s_waitcnt lgkmcnt(0)
127+
; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
128+
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
129+
; GCN-NEXT: v_mov_b32_e32 v2, s4
130+
; GCN-NEXT: v_mov_b32_e32 v3, s5
132131
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
133132
; GCN-NEXT: s_endpgm
134133
%tmp2 = icmp eq i32 %tmp, 0

llvm/test/CodeGen/AMDGPU/select-load-to-load-select-ptr-combine.ll

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ define i32 @select_load_i32_p1(i1 %cond, ptr addrspace(1) %a, ptr addrspace(1) %
2222
; CHECK-LABEL: select_load_i32_p1:
2323
; CHECK: ; %bb.0:
2424
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; CHECK-NEXT: global_load_dword v5, v[1:2], off
26+
; CHECK-NEXT: global_load_dword v6, v[3:4], off
2527
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
2628
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
27-
; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
28-
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
29-
; CHECK-NEXT: global_load_dword v0, v[1:2], off
3029
; CHECK-NEXT: s_waitcnt vmcnt(0)
30+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
3131
; CHECK-NEXT: s_setpc_b64 s[30:31]
3232
%ld0 = load i32, ptr addrspace(1) %a
3333
%ld1 = load i32, ptr addrspace(1) %b
@@ -39,11 +39,12 @@ define i32 @select_load_i32_p3(i1 %cond, ptr addrspace(3) %a, ptr addrspace(3) %
3939
; CHECK-LABEL: select_load_i32_p3:
4040
; CHECK: ; %bb.0:
4141
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+
; CHECK-NEXT: ds_read_b32 v1, v1
43+
; CHECK-NEXT: ds_read_b32 v2, v2
4244
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
4345
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
44-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
45-
; CHECK-NEXT: ds_read_b32 v0, v0
4646
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
47+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
4748
; CHECK-NEXT: s_setpc_b64 s[30:31]
4849
%ld0 = load i32, ptr addrspace(3) %a
4950
%ld1 = load i32, ptr addrspace(3) %b
@@ -89,12 +90,12 @@ define i8 @select_load_i8_p1(i1 %cond, ptr addrspace(1) %a, ptr addrspace(1) %b)
8990
; CHECK-LABEL: select_load_i8_p1:
9091
; CHECK: ; %bb.0:
9192
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93+
; CHECK-NEXT: global_load_ubyte v5, v[1:2], off
94+
; CHECK-NEXT: global_load_ubyte v6, v[3:4], off
9295
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
9396
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
94-
; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
95-
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
96-
; CHECK-NEXT: global_load_ubyte v0, v[1:2], off
9797
; CHECK-NEXT: s_waitcnt vmcnt(0)
98+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v5, vcc
9899
; CHECK-NEXT: s_setpc_b64 s[30:31]
99100
%ld0 = load i8, ptr addrspace(1) %a
100101
%ld1 = load i8, ptr addrspace(1) %b
@@ -106,16 +107,12 @@ define i32 @select_load_i32_p1_offset(i1 %cond, ptr addrspace(1) %a, ptr addrspa
106107
; CHECK-LABEL: select_load_i32_p1_offset:
107108
; CHECK: ; %bb.0:
108109
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109-
; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, 0x100, v1
110-
; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v2, vcc
111-
; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, 0x200, v1
110+
; CHECK-NEXT: global_load_dword v3, v[1:2], off offset:256
111+
; CHECK-NEXT: global_load_dword v4, v[1:2], off offset:512
112112
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
113-
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
114113
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
115-
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
116-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc
117-
; CHECK-NEXT: global_load_dword v0, v[0:1], off
118114
; CHECK-NEXT: s_waitcnt vmcnt(0)
115+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc
119116
; CHECK-NEXT: s_setpc_b64 s[30:31]
120117
%gep.a = getelementptr i8, ptr addrspace(1) %a, i64 256
121118
%gep.b = getelementptr i8, ptr addrspace(1) %a, i64 512

0 commit comments

Comments
 (0)