Skip to content

Commit a406bd2

Browse files
committed
AMDGPU: Handle invariant when lowering global loads
Global with invariant should be treated identically to constant.
1 parent dd3f339 commit a406bd2

File tree

2 files changed

+16
-9
lines changed

2 files changed

+16
-9
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11944,7 +11944,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1194411944
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
1194511945
(AS == AMDGPUAS::GLOBAL_ADDRESS &&
1194611946
Subtarget->getScalarizeGlobalBehavior() && Load->isSimple() &&
11947-
isMemOpHasNoClobberedMemOperand(Load))) {
11947+
(Load->isInvariant() || isMemOpHasNoClobberedMemOperand(Load)))) {
1194811948
if ((!Op->isDivergent() || AMDGPU::isUniformMMO(MMO)) &&
1194911949
Alignment >= Align(4) && NumElements < 32) {
1195011950
if (MemVT.isPow2VectorType() ||

llvm/test/CodeGen/AMDGPU/load-global-invariant.ll

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,22 @@ define amdgpu_kernel void @load_global_v3i64(ptr addrspace(1) %dst, ptr addrspac
5050
define amdgpu_kernel void @load_global_v3i64_invariant(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
5151
; CHECK-LABEL: load_global_v3i64_invariant:
5252
; CHECK: ; %bb.0:
53-
; CHECK-NEXT: v_mov_b32_e32 v6, 0
54-
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
55-
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8
53+
; CHECK-NEXT: v_mov_b32_e32 v4, 0
54+
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
55+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8
5656
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
57-
; CHECK-NEXT: global_load_dwordx4 v[0:3], v6, s[2:3]
58-
; CHECK-NEXT: global_load_dwordx2 v[4:5], v6, s[2:3] offset:16
59-
; CHECK-NEXT: s_waitcnt vmcnt(0)
60-
; CHECK-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
61-
; CHECK-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
57+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
58+
; CHECK-NEXT: s_nop 0
59+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x10
60+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
61+
; CHECK-NEXT: v_mov_b32_e32 v0, s6
62+
; CHECK-NEXT: v_mov_b32_e32 v1, s7
63+
; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] offset:16
64+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
65+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
66+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
67+
; CHECK-NEXT: v_mov_b32_e32 v3, s3
68+
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
6269
; CHECK-NEXT: s_endpgm
6370
%ld = load <3 x i64>, ptr addrspace(1) %src, align 32, !invariant.load !0
6471
store <3 x i64> %ld, ptr addrspace(1) %dst, align 32

0 commit comments

Comments
 (0)