Skip to content

Commit c2e8ead

Browse files
committed
Changes from review feedback.
1 parent d05b07a commit c2e8ead

14 files changed

+749
-688
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18488,22 +18488,6 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1848818488
case AtomicRMWInst::Add:
1848918489
// PCIe supports add and xchg for system atomics.
1849018490
return atomicSupportedIfLegalIntType(RMW);
18491-
case AtomicRMWInst::USubCond:
18492-
if (Subtarget->hasCondSubInsts()) {
18493-
if (auto *IT = dyn_cast<IntegerType>(RMW->getType())) {
18494-
if (IT->getBitWidth() == 32)
18495-
return TargetLowering::AtomicExpansionKind::None;
18496-
}
18497-
}
18498-
return TargetLowering::AtomicExpansionKind::CmpXChg;
18499-
case AtomicRMWInst::USubSat:
18500-
if (Subtarget->hasSubClampInsts()) {
18501-
if (auto *IT = dyn_cast<IntegerType>(RMW->getType())) {
18502-
if (IT->getBitWidth() == 32)
18503-
return TargetLowering::AtomicExpansionKind::None;
18504-
}
18505-
}
18506-
return TargetLowering::AtomicExpansionKind::CmpXChg;
1850718491
case AtomicRMWInst::Sub:
1850818492
case AtomicRMWInst::And:
1850918493
case AtomicRMWInst::Or:
@@ -18513,7 +18497,19 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1851318497
case AtomicRMWInst::UMax:
1851418498
case AtomicRMWInst::UMin:
1851518499
case AtomicRMWInst::UIncWrap:
18516-
case AtomicRMWInst::UDecWrap: {
18500+
case AtomicRMWInst::UDecWrap:
18501+
case AtomicRMWInst::USubCond:
18502+
case AtomicRMWInst::USubSat: {
18503+
if (Op == AtomicRMWInst::USubCond && !Subtarget->hasCondSubInsts())
18504+
return AtomicExpansionKind::CmpXChg;
18505+
if (Op == AtomicRMWInst::USubSat && !Subtarget->hasSubClampInsts())
18506+
return AtomicExpansionKind::CmpXChg;
18507+
if (Op == AtomicRMWInst::USubCond || Op == AtomicRMWInst::USubSat) {
18508+
auto *IT = dyn_cast<IntegerType>(RMW->getType());
18509+
if (!IT || IT->getBitWidth() != 32)
18510+
return AtomicExpansionKind::CmpXChg;
18511+
}
18512+
1851718513
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1851818514
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
1851918515
if (Subtarget->hasEmulatedSystemScopeAtomics())

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define i32 @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) {
3939
; GFX12-NEXT: global_inv scope:SCOPE_SYS
4040
; GFX12-NEXT: s_wait_loadcnt 0x0
4141
; GFX12-NEXT: s_setpc_b64 s[30:31]
42-
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data seq_cst
42+
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data seq_cst, !amdgpu.no.remote.memory !0
4343
ret i32 %ret
4444
}
4545

@@ -84,7 +84,7 @@ define i32 @global_atomic_csub_offset(ptr addrspace(1) %ptr, i32 %data) {
8484
; GFX12-NEXT: s_wait_loadcnt 0x0
8585
; GFX12-NEXT: s_setpc_b64 s[30:31]
8686
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
87-
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst
87+
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst, !amdgpu.no.remote.memory !0
8888
ret i32 %ret
8989
}
9090

@@ -123,7 +123,7 @@ define void @global_atomic_csub_nortn(ptr addrspace(1) %ptr, i32 %data) {
123123
; GFX12-NEXT: global_inv scope:SCOPE_SYS
124124
; GFX12-NEXT: s_wait_loadcnt 0x0
125125
; GFX12-NEXT: s_setpc_b64 s[30:31]
126-
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data seq_cst
126+
%ret = atomicrmw usub_sat ptr addrspace(1) %ptr, i32 %data seq_cst, !amdgpu.no.remote.memory !0
127127
ret void
128128
}
129129

@@ -168,7 +168,7 @@ define void @global_atomic_csub_offset_nortn(ptr addrspace(1) %ptr, i32 %data) {
168168
; GFX12-NEXT: s_wait_loadcnt 0x0
169169
; GFX12-NEXT: s_setpc_b64 s[30:31]
170170
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
171-
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst
171+
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst, !amdgpu.no.remote.memory !0
172172
ret void
173173
}
174174

@@ -215,7 +215,7 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(ptr addrspace(1)
215215
; GFX12-NEXT: global_store_b32 v[0:1], v0, off
216216
; GFX12-NEXT: s_endpgm
217217
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
218-
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst
218+
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst, !amdgpu.no.remote.memory !0
219219
store i32 %ret, ptr addrspace(1) poison
220220
ret void
221221
}
@@ -260,9 +260,11 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspa
260260
; GFX12-NEXT: global_inv scope:SCOPE_SYS
261261
; GFX12-NEXT: s_endpgm
262262
%gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
263-
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst
263+
%ret = atomicrmw usub_sat ptr addrspace(1) %gep, i32 %data seq_cst, !amdgpu.no.remote.memory !0
264264
ret void
265265
}
266266

267267
attributes #0 = { nounwind willreturn }
268268
attributes #1 = { argmemonly nounwind }
269+
270+
!0 = !{}

0 commit comments

Comments
 (0)