Skip to content

Commit e79c7c1

Browse files
authored
AMDGPU: Handle invariant loads when considering if a load can be scalar (llvm#168787)
1 parent 62deee4 commit e79c7c1

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4437,7 +4437,8 @@ bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {
44374437
Ld->getAlign() >=
44384438
Align(std::min(MMO->getSize().getValue().getKnownMinValue(),
44394439
uint64_t(4))) &&
4440-
((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
4440+
(MMO->isInvariant() ||
4441+
(Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
44414442
Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
44424443
(Subtarget->getScalarizeGlobalBehavior() &&
44434444
Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&

llvm/test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; GCN-DAG: buffer_load_dwordx2 [[PTR:v\[[0-9]+:[0-9]+\]]],
1111
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
1212
; GCN: buffer_store_dword [[K]], [[PTR]]
13-
define amdgpu_kernel void @test_merge_store_constant_i16_invariant_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) #0 {
13+
define void @test_merge_store_constant_i16_invariant_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) #0 {
1414
%ptr = load ptr addrspace(1), ptr addrspace(1) %in, !invariant.load !0
1515
%ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1
1616
store i16 123, ptr addrspace(1) %ptr, align 4
@@ -30,6 +30,19 @@ define amdgpu_kernel void @test_merge_store_constant_i16_invariant_constant_poin
3030
ret void
3131
}
3232

33+
; Invariant global load should be equivalently handled to constant.
34+
; GCN-LABEL: {{^}}test_merge_store_global_i16_invariant_uniform_global_pointer_load:
35+
; GCN: s_load_dwordx2 s[[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]]
36+
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
37+
; GCN: buffer_store_dword [[K]], off, s[[[SPTR_LO]]:
38+
define amdgpu_kernel void @test_merge_store_global_i16_invariant_uniform_global_pointer_load(ptr addrspace(1) dereferenceable(4096) nonnull %in) #0 {
39+
%ptr = load ptr addrspace(1), ptr addrspace(1) %in, !invariant.load !0
40+
%ptr.1 = getelementptr i16, ptr addrspace(1) %ptr, i64 1
41+
store i16 123, ptr addrspace(1) %ptr, align 4
42+
store i16 456, ptr addrspace(1) %ptr.1
43+
ret void
44+
}
45+
3346
!0 = !{}
3447

3548
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)