Skip to content

Commit 6184ef1

Browse files
changpengKonstantin Zhuravlyov
andauthored
[AMDGPU] Support f64 atomics on gfx1250 (llvm#151172)
- BUF/FLAT/GLOBAL_ADD/MIN/MAX_F64 - DS_ADD_F64 Co-authored-by: Konstantin Zhuravlyov <Konstantin [email protected]>
1 parent 330a7e1 commit 6184ef1

14 files changed

+2397
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2429,7 +2429,7 @@ def HasAtomicFMinFMaxF64FlatInsts :
24292429

24302430
def HasLdsAtomicAddF64 :
24312431
Predicate<"Subtarget->hasLdsAtomicAddF64()">,
2432-
AssemblerPredicate<(any_of FeatureGFX90AInsts)>;
2432+
AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX1250Insts)>;
24332433

24342434
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
24352435
AssemblerPredicate<(all_of FeatureFlatGlobalInsts)>;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1682,7 +1682,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
16821682
if (ST.hasFlatAtomicFaddF32Inst())
16831683
Atomic.legalFor({{S32, FlatPtr}});
16841684

1685-
if (ST.hasGFX90AInsts()) {
1685+
if (ST.hasGFX90AInsts() || ST.hasGFX1250Insts()) {
16861686
// These are legal with some caveats, and should have undergone expansion in
16871687
// the IR in most situations
16881688
// TODO: Move atomic expansion into legalizer

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2489,7 +2489,7 @@ multiclass VBUFFER_MTBUF_Real_gfx12<bits<4> op, string real_name> {
24892489
}
24902490

24912491
//===----------------------------------------------------------------------===//
2492-
// MUBUF - GFX11, GFX12.
2492+
// MUBUF - GFX11, GFX12, GFX1250.
24932493
//===----------------------------------------------------------------------===//
24942494

24952495
// gfx11 instruction that accept both old and new assembler name.
@@ -2600,6 +2600,12 @@ multiclass MUBUF_Real_Atomic_gfx11_gfx12<bits<8> op,
26002600
def : Mnem_gfx12<gfx11_name, gfx12_name>;
26012601
}
26022602

2603+
multiclass MUBUF_Real_Atomic_gfx12_Renamed<bits<8> op, string real_name> :
2604+
MUBUF_Real_Atomic_gfx12_impl<op, 0, real_name>,
2605+
MUBUF_Real_Atomic_gfx12_impl<op, 1, real_name> {
2606+
def : Mnem_gfx12<get_BUF_ps<NAME>.Mnemonic, real_name>;
2607+
}
2608+
26032609
defm BUFFER_GL0_INV : MUBUF_Real_gfx11<0x02B>;
26042610
defm BUFFER_GL1_INV : MUBUF_Real_gfx11<0x02C>;
26052611

@@ -2678,6 +2684,10 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_gfx11_gfx12<0x04B, "buffer
26782684
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_gfx12<0x059>;
26792685
defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_gfx12<0x05a>;
26802686

2687+
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_gfx12<0x055>;
2688+
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05b, "buffer_atomic_min_num_f64">;
2689+
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Real_Atomic_gfx12_Renamed<0x05c, "buffer_atomic_max_num_f64">;
2690+
26812691
//===----------------------------------------------------------------------===//
26822692
// MUBUF - GFX10.
26832693
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,6 +1397,9 @@ defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0,
13971397
defm DS_BVH_STACK_PUSH8_POP1_RTN_B32 : DS_Real_gfx12<0x0e1>;
13981398
defm DS_BVH_STACK_PUSH8_POP2_RTN_B64 : DS_Real_gfx12<0x0e2>;
13991399

1400+
defm DS_ADD_F64 : DS_Real_gfx12<0x054>;
1401+
defm DS_ADD_RTN_F64 : DS_Real_gfx12<0x074>;
1402+
14001403
let AssemblerPredicate = HasLdsBarrierArriveAtomic in {
14011404
defm DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 : DS_Real_gfx12<0x056>;
14021405
defm DS_ATOMIC_BARRIER_ARRIVE_RTN_B64 : DS_Real_gfx12<0x075>;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3488,6 +3488,14 @@ defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "globa
34883488
defm GLOBAL_LOAD_TR4_B64 : VFLAT_Real_AllAddr_gfx1250<0x073>;
34893489
defm GLOBAL_LOAD_TR6_B96 : VFLAT_Real_AllAddr_gfx1250<0x074>;
34903490

3491+
defm FLAT_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
3492+
defm FLAT_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "flat_atomic_min_num_f64">;
3493+
defm FLAT_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "flat_atomic_max_num_f64">;
3494+
3495+
defm GLOBAL_ATOMIC_ADD_F64 : VFLAT_Real_Atomics_gfx1250<0x055>;
3496+
defm GLOBAL_ATOMIC_MIN_F64 : VFLAT_Real_Atomics_gfx1250<0x05b, "global_atomic_min_num_f64">;
3497+
defm GLOBAL_ATOMIC_MAX_F64 : VFLAT_Real_Atomics_gfx1250<0x05c, "global_atomic_max_num_f64">;
3498+
34913499
def True16D16Table : GenericTable {
34923500
let FilterClass = "True16D16Table";
34933501
let CppTypeName = "True16D16Info";

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
715715
bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }
716716

717717
// DS_ADD_F64/DS_ADD_RTN_F64
718-
bool hasLdsAtomicAddF64() const { return hasGFX90AInsts(); }
718+
bool hasLdsAtomicAddF64() const {
719+
return hasGFX90AInsts() || hasGFX1250Insts();
720+
}
719721

720722
bool hasMultiDwordFlatScratchAddressing() const {
721723
return getGeneration() >= GFX9;

0 commit comments

Comments
 (0)