Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ def FeatureSafeSmemPrefetch : SubtargetFeature<"safe-smem-prefetch",
"SMEM prefetches do not fail on illegal address"
>;

def FeatureSafeCUPrefetch : SubtargetFeature<"safe-cu-prefetch",
"HasSafeCUPrefetch",
"true",
"VMEM CU scope prefetches do not fail on illegal address"
>;

def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
"HasVcmpxExecWARHazard",
"true",
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -446,5 +446,8 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;

def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
GISDNodeXFormEquiv<PrefetchLoc>;

def gi_MFMALdScaleModifierOp : GICustomOperandRenderer<"renderScaledMAIIntrinsicOperand">,
GISDNodeXFormEquiv<MFMALdScaleXForm>;
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Target/TargetMachine.h"

namespace llvm {
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7068,6 +7068,17 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}

void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
uint32_t V = MI.getOperand(2).getImm();
V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK))
<< AMDGPU::CPol::SCOPE_SHIFT;
if (!Subtarget->hasSafeCUPrefetch())
V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
MIB.addImm(V);
}

/// Convert from 2-bit value to enum values used for op_sel* source modifiers.
void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector {

void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

void renderScaledMAIIntrinsicOperand(MachineInstrBuilder &MIB,
const MachineInstr &MI, int OpIdx) const;

Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3501,19 +3501,24 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingMAD_64_32(B, OpdMapper);
return;
case AMDGPU::G_PREFETCH: {
if (!Subtarget.hasPrefetch() || !Subtarget.hasSafeSmemPrefetch()) {
if (!Subtarget.hasSafeSmemPrefetch() && !Subtarget.hasVmemPrefInsts()) {
MI.eraseFromParent();
return;
}
Register PtrReg = MI.getOperand(0).getReg();
unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID);
if (PtrBank == AMDGPU::VGPRRegBankID) {
if (PtrBank == AMDGPU::VGPRRegBankID &&
(!Subtarget.hasVmemPrefInsts() || !MI.getOperand(3).getImm())) {
// Cannot do I$ prefetch with divergent pointer.
MI.eraseFromParent();
return;
}
unsigned AS = MRI.getType(PtrReg).getAddressSpace();
if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
if ((!AMDGPU::isFlatGlobalAddrSpace(AS) &&
AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
(!Subtarget.hasSafeSmemPrefetch() &&
(AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
!MI.getOperand(3).getImm() /* I$ prefetch */))) {
MI.eraseFromParent();
return;
}
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2184,6 +2184,50 @@ defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f

} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]

def PrefetchLoc: SDNodeXForm<timm, [{
uint32_t V = N->getZExtValue();
V = (AMDGPU::CPol::SCOPE_MASK - (V & AMDGPU::CPol::SCOPE_MASK)) << AMDGPU::CPol::SCOPE_SHIFT;
if (!Subtarget->hasSafeCUPrefetch())
V = std::max(V, (uint32_t)AMDGPU::CPol::SCOPE_SE); // CU scope is unsafe
return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);
}]>;

def prefetch_flat : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
[{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]> {
let GISelPredicateCode = [{
return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
}];
}

def prefetch_global : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
[{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
(cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
!Subtarget->hasSafeSmemPrefetch()); }]> {
let GISelPredicateCode = [{
return (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
((*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
!Subtarget->hasSafeSmemPrefetch());
}];
}

multiclass FlatPrefetchPats<string inst, SDPatternOperator prefetch_kind, SDPatternOperator rw> {
def : GCNPat <
(prefetch_kind (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
(!cast<FLAT_Pseudo>(inst) $vaddr, $offset, (i32 (PrefetchLoc $loc)))
> {
let AddedComplexity = !if(!eq(rw, i32imm_zero), 0, 25);
}

def : GCNPat <
(prefetch_kind (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), rw, (i32 timm:$loc), i32imm_one),
(!cast<FLAT_Pseudo>(inst#"_SADDR") $saddr, $voffset, $offset, (i32 (PrefetchLoc $loc)))
> {
let AddedComplexity = !if(!eq(rw, i32imm_zero), 11, 30);
}
}

multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
def : GCNPat <
(intr (FlatOffset i64:$vaddr, i32:$offset), timm:$cpol),
Expand All @@ -2198,6 +2242,14 @@ multiclass FlatIntrPrefetchPats<string inst, SDPatternOperator intr> {
}

let SubtargetPredicate = HasVmemPrefInsts in {
defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_zero>;
defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_zero>;

// Patterns for forced vector prefetch with rw = 1.
defm : FlatPrefetchPats<"FLAT_PREFETCH_B8", prefetch_flat, i32imm_one>;
defm : FlatPrefetchPats<"GLOBAL_PREFETCH_B8", prefetch_global, i32imm_one>;


// Patterns for target intrinsics
defm : FlatIntrPrefetchPats<"FLAT_PREFETCH_B8", int_amdgcn_flat_prefetch>;
defm : FlatIntrPrefetchPats<"GLOBAL_PREFETCH_B8", int_amdgcn_global_prefetch>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasInstFwdPrefetchBug = false;
bool HasVmemPrefInsts = false;
bool HasSafeSmemPrefetch = false;
bool HasSafeCUPrefetch = false;
bool HasVcmpxExecWARHazard = false;
bool HasLdsBranchVmemWARHazard = false;
bool HasNSAtoVMEMBug = false;
Expand Down Expand Up @@ -995,6 +996,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; }

bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }

// Has s_cmpk_* instructions.
bool hasSCmpK() const { return getGeneration() < GFX12; }

Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,13 @@ enum CPol {
TH_ATOMIC_CASCADE = 4, // Cascading vs regular

// Scope
SCOPE = 0x3 << 3, // All Scope bits
SCOPE_CU = 0 << 3,
SCOPE_SE = 1 << 3,
SCOPE_DEV = 2 << 3,
SCOPE_SYS = 3 << 3,
SCOPE_SHIFT = 3,
SCOPE_MASK = 0x3,
SCOPE = SCOPE_MASK << SCOPE_SHIFT, // All Scope bits
SCOPE_CU = 0 << SCOPE_SHIFT,
SCOPE_SE = 1 << SCOPE_SHIFT,
SCOPE_DEV = 2 << SCOPE_SHIFT,
SCOPE_SYS = 3 << SCOPE_SHIFT,

NV = 1 << 5, // Non-volatile bit

Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasMad64_32())
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);

if (Subtarget->hasPrefetch() && Subtarget->hasSafeSmemPrefetch())
if (Subtarget->hasSafeSmemPrefetch() || Subtarget->hasVmemPrefInsts())
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

if (Subtarget->hasIEEEMinimumMaximumInsts()) {
Expand Down Expand Up @@ -4444,19 +4444,28 @@ SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
}

SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
if (Op->isDivergent())
if (Op->isDivergent() &&
(!Subtarget->hasVmemPrefInsts() || !Op.getConstantOperandVal(4)))
// Cannot do I$ prefetch with divergent pointer.
return SDValue();

switch (cast<MemSDNode>(Op)->getAddressSpace()) {
case AMDGPUAS::FLAT_ADDRESS:
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
break;
case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
if (Subtarget->hasSafeSmemPrefetch())
break;
[[fallthrough]];
default:
return SDValue();
}

// I$ prefetch
if (!Subtarget->hasSafeSmemPrefetch() && !Op.getConstantOperandVal(4))
return SDValue();

return Op;
}

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -856,9 +856,9 @@ def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>;

def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
(prefetch node:$ptr, node:$rw, node:$loc, node:$type),
[{ return !N->getOperand(1)->isDivergent();}]> {
[{ return !N->getOperand(1)->isDivergent() && Subtarget->hasSafeSmemPrefetch();}]> {
let GISelPredicateCode = [{
return isInstrUniform(MI);
return isInstrUniform(MI) && Subtarget->hasSafeSmemPrefetch();
}];
}

Expand Down Expand Up @@ -1152,6 +1152,7 @@ multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
}

defm : SMPrefetchPat<"INST", i32imm_zero>;
let AddedComplexity = 12 in // Prefer scalar prefetch over global for r/o case.
defm : SMPrefetchPat<"DATA", i32imm_one>;

let SubtargetPredicate = isGFX12Plus in {
Expand Down
Loading