Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,13 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
TARGET_BUILTIN(__builtin_amdgcn_flat_prefetch, "vvC*0Ii", "nc", "vmem-pref-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_prefetch, "vvC*1Ii", "nc", "vmem-pref-insts")

TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b32, "ii*1Ii", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b64, "V2iV2i*1Ii", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b128, "V4iV4i*1Ii", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b32, "ii*0Ii", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b64, "V2iV2i*0Ii", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b128, "V4iV4i*0Ii", "nc", "gfx1250-insts")

TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds_d2, "vV4iV8iIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_tensor_store_from_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
Expand Down
35 changes: 35 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,41 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
return Builder.CreateCall(F, {Addr});
}
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {

Intrinsic::ID IID;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
IID = Intrinsic::amdgcn_global_load_monitor_b32;
break;
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
IID = Intrinsic::amdgcn_global_load_monitor_b64;
break;
case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
IID = Intrinsic::amdgcn_global_load_monitor_b128;
break;
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
IID = Intrinsic::amdgcn_flat_load_monitor_b32;
break;
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
IID = Intrinsic::amdgcn_flat_load_monitor_b64;
break;
case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
IID = Intrinsic::amdgcn_flat_load_monitor_b128;
break;
}

llvm::Type *LoadTy = ConvertType(E->getType());
llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
llvm::Value *Val = EmitScalarExpr(E->getArg(1));
llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
return Builder.CreateCall(F, {Addr, Val});
}
case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
// Should this have asan instrumentation?
return emitBuiltinWithOneOverloadedType<5>(*this, E,
Expand Down
66 changes: 66 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250

typedef int v2i __attribute__((ext_vector_type(2)));
typedef int v4i __attribute__((ext_vector_type(4)));

// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 1)
// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
//
int test_amdgcn_global_load_monitor_b32(global int* inptr)
{
return __builtin_amdgcn_global_load_monitor_b32(inptr, 1);
}

// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) [[INPTR:%.*]], i32 10)
// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]]
//
v2i test_amdgcn_global_load_monitor_b64(global v2i* inptr)
{
return __builtin_amdgcn_global_load_monitor_b64(inptr, 10);
}

// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b128(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.monitor.b128.v4i32(ptr addrspace(1) [[INPTR:%.*]], i32 22)
// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]]
//
v4i test_amdgcn_global_load_monitor_b128(global v4i* inptr)
{
return __builtin_amdgcn_global_load_monitor_b128(inptr, 22);
}

// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b32(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr [[INPTR:%.*]], i32 27)
// CHECK-GFX1250-NEXT: ret i32 [[TMP0]]
//
int test_amdgcn_flat_load_monitor_b32(int* inptr)
{
return __builtin_amdgcn_flat_load_monitor_b32(inptr, 27);
}

// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b64(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr [[INPTR:%.*]], i32 1)
// CHECK-GFX1250-NEXT: ret <2 x i32> [[TMP0]]
//
v2i test_amdgcn_flat_load_monitor_b64(v2i* inptr)
{
return __builtin_amdgcn_flat_load_monitor_b64(inptr, 1);
}

// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b128(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr [[INPTR:%.*]], i32 0)
// CHECK-GFX1250-NEXT: ret <4 x i32> [[TMP0]]
//
v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr)
{
return __builtin_amdgcn_flat_load_monitor_b128(inptr, 0);
}
12 changes: 12 additions & 0 deletions clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-- -target-cpu gfx1250 -verify -S -o - %s

typedef int v2i __attribute__((ext_vector_type(2)));
typedef int v4i __attribute__((ext_vector_type(4)));
typedef int v8i __attribute__((ext_vector_type(8)));

Expand Down Expand Up @@ -28,6 +29,17 @@ void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) {
__builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}}
}

void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr,
global int* b32out, global v2i* b64out, global v4i* b128out, int cpol)
{
*b32out = __builtin_amdgcn_global_load_monitor_b32(b32gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b32' must be a constant integer}}
*b64out = __builtin_amdgcn_global_load_monitor_b64(b64gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b64' must be a constant integer}}
*b128out = __builtin_amdgcn_global_load_monitor_b128(b128gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b128' must be a constant integer}}
*b32out = __builtin_amdgcn_flat_load_monitor_b32(b32faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b32' must be a constant integer}}
*b64out = __builtin_amdgcn_flat_load_monitor_b64(b64faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b64' must be a constant integer}}
*b128out = __builtin_amdgcn_flat_load_monitor_b128(b128faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b128' must be a constant integer}}
}

void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
{
__builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_load_to_lds' must be a constant integer}}
Expand Down
21 changes: 21 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

def flat_ptr_ty : LLVMQualPointerType<0>;
def global_ptr_ty : LLVMQualPointerType<1>;
def local_ptr_ty : LLVMQualPointerType<3>;

Expand Down Expand Up @@ -3846,6 +3847,26 @@ def int_amdgcn_tensor_load_to_lds_d2 :
def int_amdgcn_tensor_store_from_lds_d2 :
ClangBuiltin<"__builtin_amdgcn_tensor_store_from_lds_d2">, AMDGPUTensorLoadStoreD2;

class AMDGPULoadMonitor<LLVMType ptr_ty>:
Intrinsic<
[llvm_any_ty],
[ptr_ty,
llvm_i32_ty], // gfx12+ cachepolicy:
// bits [0-2] = th
// bits [3-4] = scope
[IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
IntrWillReturn, IntrConvergent, IntrNoCallback, IntrNoFree],
"",
[SDNPMemOperand]
>;

def int_amdgcn_flat_load_monitor_b32 : AMDGPULoadMonitor<flat_ptr_ty>;
def int_amdgcn_flat_load_monitor_b64 : AMDGPULoadMonitor<flat_ptr_ty>;
def int_amdgcn_flat_load_monitor_b128 : AMDGPULoadMonitor<flat_ptr_ty>;
def int_amdgcn_global_load_monitor_b32 : AMDGPULoadMonitor<global_ptr_ty>;
def int_amdgcn_global_load_monitor_b64 : AMDGPULoadMonitor<global_ptr_ty>;
def int_amdgcn_global_load_monitor_b128 : AMDGPULoadMonitor<global_ptr_ty>;

/// Emit an addrspacecast without null pointer checking.
/// Should only be inserted by a pass based on analysis of an addrspacecast's src.
def int_amdgcn_addrspacecast_nonnull : DefaultAttrsIntrinsic<
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def gi_global_offset :
def gi_global_saddr :
GIComplexOperandMatcher<s64, "selectGlobalSAddr">,
GIComplexPatternEquiv<GlobalSAddr>;
def gi_global_saddr_cpol :
GIComplexOperandMatcher<s64, "selectGlobalSAddrCPol">,
GIComplexPatternEquiv<GlobalSAddrCPol>;
def gi_global_saddr_glc :
GIComplexOperandMatcher<s64, "selectGlobalSAddrGLC">,
GIComplexPatternEquiv<GlobalSAddrGLC>;
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,22 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
return true;
}

bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset,
SDValue &CPol) const {
bool ScaleOffset;
if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
return false;

// We are assuming CPol is always the last operand of the intrinsic.
auto PassedCPol =
N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
CPol = CurDAG->getTargetConstant(
(ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
return true;
}

bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,
SDValue &SAddr, SDValue &VOffset,
SDValue &Offset,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
bool SelectGlobalSAddrCPol(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
bool SelectGlobalSAddrGLC(SDNode *N, SDValue Addr, SDValue &SAddr,
SDValue &VOffset, SDValue &Offset,
SDValue &CPol) const;
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5773,6 +5773,16 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
return selectGlobalSAddr(Root, 0);
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddrCPol(MachineOperand &Root) const {
const MachineInstr &I = *Root.getParent();

// We are assuming CPol is always the last operand of the intrinsic.
auto PassedCPol =
I.getOperand(I.getNumOperands() - 1).getImm() & ~AMDGPU::CPol::SCAL;
return selectGlobalSAddr(Root, PassedCPol);
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectGlobalSAddrGLC(MachineOperand &Root) const {
return selectGlobalSAddr(Root, AMDGPU::CPol::GLC);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectGlobalSAddr(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectGlobalSAddrCPol(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectGlobalSAddrGLC(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5180,6 +5180,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_ds_load_tr16_b128:
case Intrinsic::amdgcn_ds_load_tr4_b64:
case Intrinsic::amdgcn_ds_load_tr6_b96:
case Intrinsic::amdgcn_flat_load_monitor_b32:
case Intrinsic::amdgcn_flat_load_monitor_b64:
case Intrinsic::amdgcn_flat_load_monitor_b128:
case Intrinsic::amdgcn_global_load_monitor_b32:
case Intrinsic::amdgcn_global_load_monitor_b64:
case Intrinsic::amdgcn_global_load_monitor_b128:
case Intrinsic::amdgcn_ds_read_tr4_b64:
case Intrinsic::amdgcn_ds_read_tr6_b96:
case Intrinsic::amdgcn_ds_read_tr8_b64:
Expand Down
36 changes: 36 additions & 0 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ let WantsRoot = true in {

def GlobalSAddr : ComplexPattern<iPTR, 4, "SelectGlobalSAddr", [], [], -10>;
def GlobalSAddrGLC : ComplexPattern<iPTR, 4, "SelectGlobalSAddrGLC", [], [], -10>;
def GlobalSAddrCPol : ComplexPattern<iPTR, 4, "SelectGlobalSAddrCPol", [], [], -10>;
def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
def ScratchSVAddr : ComplexPattern<iPTR, 4, "SelectScratchSVAddr", [], [], -10>;
}
Expand Down Expand Up @@ -1274,6 +1275,11 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
(inst $vaddr, $offset)
>;

class FlatLoadPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FlatOffset i64:$vaddr, i32:$offset), (i32 timm:$cpol))),
(inst $vaddr, $offset, $cpol)
>;

class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
(inst $vaddr, $offset, 0, $in)
Expand Down Expand Up @@ -1324,6 +1330,16 @@ class FlatLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
(inst $saddr, $voffset, $offset, $cpol)
>;

class FlatLoadSignedPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), (i32 timm:$cpol))),
(inst $vaddr, $offset, $cpol)
>;

class GlobalLoadSaddrPat_CPOL <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddrCPol (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol), (i32 timm))),
(inst $saddr, $voffset, $offset, $cpol)
>;

class FlatStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
(node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset, CPol:$cpol)),
Expand Down Expand Up @@ -1519,6 +1535,16 @@ multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueTyp
}
}

multiclass GlobalFLATLoadPats_CPOL<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat_CPOL<inst, node, vt> {
let AddedComplexity = 10;
}

def : GlobalLoadSaddrPat_CPOL<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
let AddedComplexity = 11;
}
}

multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatSignedLoadPat_D16 <inst, node, vt> {
let AddedComplexity = 10;
Expand Down Expand Up @@ -2055,6 +2081,16 @@ let WaveSizePredicate = isWave32, OtherPredicates = [HasTransposeLoadF4F6Insts]
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR6_B96, int_amdgcn_global_load_tr6_b96, v3i32>;
}

let OtherPredicates = [isGFX125xOnly] in {
def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B32, int_amdgcn_flat_load_monitor_b32, i32>;
def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B64, int_amdgcn_flat_load_monitor_b64, v2i32>;
def : FlatLoadPat_CPOL <FLAT_LOAD_MONITOR_B128, int_amdgcn_flat_load_monitor_b128, v4i32>;

defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B32, int_amdgcn_global_load_monitor_b32, i32>;
defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B64, int_amdgcn_global_load_monitor_b64, v2i32>;
defm : GlobalFLATLoadPats_CPOL <GLOBAL_LOAD_MONITOR_B128, int_amdgcn_global_load_monitor_b128, v4i32>;
} // End SubtargetPredicate = isGFX125xOnly

let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_flat_load_monitor_b32:
case Intrinsic::amdgcn_flat_load_monitor_b64:
case Intrinsic::amdgcn_flat_load_monitor_b128:
case Intrinsic::amdgcn_global_load_monitor_b32:
case Intrinsic::amdgcn_global_load_monitor_b64:
case Intrinsic::amdgcn_global_load_monitor_b128:
case Intrinsic::amdgcn_ds_load_tr6_b96:
case Intrinsic::amdgcn_ds_load_tr4_b64:
case Intrinsic::amdgcn_ds_load_tr8_b64:
Expand Down Expand Up @@ -1603,10 +1609,16 @@ bool SITargetLowering::getAddrModeArguments(const IntrinsicInst *II,
case Intrinsic::amdgcn_ds_atomic_barrier_arrive_rtn_b64:
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num:
case Intrinsic::amdgcn_flat_load_monitor_b128:
case Intrinsic::amdgcn_flat_load_monitor_b32:
case Intrinsic::amdgcn_flat_load_monitor_b64:
case Intrinsic::amdgcn_global_atomic_csub:
case Intrinsic::amdgcn_global_atomic_fmax_num:
case Intrinsic::amdgcn_global_atomic_fmin_num:
case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
case Intrinsic::amdgcn_global_load_monitor_b128:
case Intrinsic::amdgcn_global_load_monitor_b32:
case Intrinsic::amdgcn_global_load_monitor_b64:
case Intrinsic::amdgcn_global_load_tr_b64:
case Intrinsic::amdgcn_global_load_tr_b128:
case Intrinsic::amdgcn_global_load_tr4_b64:
Expand Down
Loading