Skip to content

Commit deb7416

Browse files
committed
[AMDGPU] Add support for store to constant address space
Since we don't stores to the constant address space as IR verifier errors, we need to support their lowering. This PR supports that by treating such stores as no-ops: in the combiner, the store node is simply replaced with its chain. Fixes SWDEV-499366.
1 parent ae7e1b8 commit deb7416

File tree

3 files changed

+82
-12
lines changed

3 files changed

+82
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,10 +3910,14 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
39103910
// type.
39113911
SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
39123912
DAGCombinerInfo &DCI) const {
3913+
StoreSDNode *SN = cast<StoreSDNode>(N);
3914+
unsigned AS = SN->getAddressSpace();
3915+
if (AMDGPU::isConstantAddressSpace(AS))
3916+
return SN->getChain();
3917+
39133918
if (!DCI.isBeforeLegalize())
39143919
return SDValue();
39153920

3916-
StoreSDNode *SN = cast<StoreSDNode>(N);
39173921
if (!SN->isSimple() || !ISD::isNormalStore(SN))
39183922
return SDValue();
39193923

@@ -3925,7 +3929,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
39253929
Align Alignment = SN->getAlign();
39263930
if (Alignment < Size && isTypeLegal(VT)) {
39273931
unsigned IsFast;
3928-
unsigned AS = SN->getAddressSpace();
39293932

39303933
// Expand unaligned stores earlier than legalization. Due to visitation
39313934
// order problems during legalization, the emitted instructions to pack and

llvm/test/CodeGen/AMDGPU/store-to-constant-error.ll

Lines changed: 0 additions & 10 deletions
This file was deleted.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
3+
4+
define amdgpu_kernel void @store_as4(ptr addrspace(4) %out, i32 %a, i32 %b) {
5+
; CHECK-LABEL: store_as4:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_endpgm
8+
%r = add i32 %a, %b
9+
store i32 %r, ptr addrspace(4) %out
10+
ret void
11+
}
12+
13+
define amdgpu_kernel void @memset_as4(ptr addrspace(4) %dst) {
14+
; CHECK-LABEL: memset_as4:
15+
; CHECK: ; %bb.0:
16+
; CHECK-NEXT: s_endpgm
17+
call void @llvm.memset.p4.i64(ptr addrspace(4) %dst, i8 0, i64 256, i1 false)
18+
ret void
19+
}
20+
21+
define amdgpu_kernel void @memcpy_to_as4(ptr addrspace(4) %dst, ptr %src) {
22+
; CHECK-LABEL: memcpy_to_as4:
23+
; CHECK: ; %bb.0:
24+
; CHECK-NEXT: s_endpgm
25+
call void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) %dst, ptr %src, i32 256, i1 false)
26+
ret void
27+
}
28+
29+
define amdgpu_kernel void @store_as6(ptr addrspace(6) %out, i32 %a, i32 %b) {
30+
; CHECK-LABEL: store_as6:
31+
; CHECK: ; %bb.0:
32+
; CHECK-NEXT: s_endpgm
33+
%r = add i32 %a, %b
34+
store i32 %r, ptr addrspace(6) %out
35+
ret void
36+
}
37+
38+
define amdgpu_kernel void @memset_as6(ptr addrspace(6) %dst) {
39+
; CHECK-LABEL: memset_as6:
40+
; CHECK: ; %bb.0:
41+
; CHECK-NEXT: s_endpgm
42+
call void @llvm.memset.p6.i64(ptr addrspace(6) %dst, i8 0, i64 256, i1 false)
43+
ret void
44+
}
45+
46+
define amdgpu_kernel void @memcpy_to_as6(ptr addrspace(6) %dst, ptr %src) {
47+
; CHECK-LABEL: memcpy_to_as6:
48+
; CHECK: ; %bb.0:
49+
; CHECK-NEXT: s_endpgm
50+
call void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) %dst, ptr %src, i32 256, i1 false)
51+
ret void
52+
}
53+
54+
; define amdgpu_kernel void @cmpxchg_to_as4(ptr addrspace(4) %dst, i32 %src) {
55+
; %void = cmpxchg ptr addrspace(4) %dst, i32 0, i32 %src seq_cst monotonic
56+
; ret void
57+
; }
58+
59+
; define amdgpu_kernel void @atomicrmw_to_as4(ptr addrspace(4) %dst, i32 %src) {
60+
; %void = atomicrmw add ptr addrspace(4) %dst, i32 %src acquire
61+
; ret void
62+
; }
63+
64+
; define amdgpu_kernel void @cmpxchg_to_as6(ptr addrspace(6) %dst, i32 %src) {
65+
; %void = cmpxchg ptr addrspace(6) %dst, i32 0, i32 %src seq_cst monotonic
66+
; ret void
67+
; }
68+
69+
; define amdgpu_kernel void @atomicrmw_to_as6(ptr addrspace(6) %dst, i32 %src) {
70+
; %void = atomicrmw add ptr addrspace(6) %dst, i32 %src acquire
71+
; ret void
72+
; }
73+
74+
declare void @llvm.memset.p4.i64(ptr addrspace(4) noalias nocapture writeonly, i8, i64, i1)
75+
declare void @llvm.memset.p6.i64(ptr addrspace(6) noalias nocapture writeonly, i8, i64, i1)
76+
declare void @llvm.memcpy.p4.p0.i32(ptr addrspace(4) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)
77+
declare void @llvm.memcpy.p6.p0.i32(ptr addrspace(6) noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1)

0 commit comments

Comments
 (0)