Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,10 @@ def FeatureLshlAddU64Inst
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
"Has v_lshl_add_u64 instruction">;

def FeatureAddSubU64Insts
: SubtargetFeature<"add-sub-u64-insts", "HasAddSubU64Insts", "true",
"Has v_add_u64 and v_sub_u64 instructions">;

def FeatureMemToLDSLoad : SubtargetFeature<"vmem-to-lds-load-insts",
"HasVMemToLDSLoad",
"true",
Expand Down Expand Up @@ -2010,6 +2014,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureKernargPreload,
FeatureLshlAddU64Inst,
FeatureAddSubU64Insts,
FeatureLdsBarrierArriveAtomic,
FeatureSetPrioIncWgInst,
]>;
Expand Down Expand Up @@ -2787,6 +2792,9 @@ def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">,
def HasLshlAddU64Inst : Predicate<"Subtarget->hasLshlAddU64Inst()">,
AssemblerPredicate<(all_of FeatureLshlAddU64Inst)>;

def HasAddSubU64Insts : Predicate<"Subtarget->hasAddSubU64Insts()">,
AssemblerPredicate<(all_of FeatureAddSubU64Insts)>;

def HasLdsBarrierArriveAtomic : Predicate<"Subtarget->hasLdsBarrierArriveAtomic()">,
AssemblerPredicate<(all_of FeatureLdsBarrierArriveAtomic)>;

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4208,6 +4208,9 @@ bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper,
assert(Ty.isScalar());

unsigned Size = Ty.getSizeInBits();
if (ST.hasVectorMulU64() && Size == 64)
return true;

unsigned NumParts = Size / 32;
assert((Size % 32) == 0);
assert(NumParts >= 2);
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2528,7 +2528,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// Special case for s_mul_u64. There is not a vector equivalent of
// s_mul_u64. Hence, we have to break down s_mul_u64 into 32-bit vector
// multiplications.
if (Opc == AMDGPU::G_MUL && DstTy.getSizeInBits() == 64) {
if (!Subtarget.hasVectorMulU64() && Opc == AMDGPU::G_MUL &&
DstTy.getSizeInBits() == 64) {
applyMappingSMULU64(B, OpdMapper);
return;
}
Expand Down Expand Up @@ -3973,7 +3974,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
} else {
OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
if (MI.getOpcode() == AMDGPU::G_MUL && Subtarget.hasVectorMulU64())
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
else
OpdsMapping[0] =
getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI /*, DefaultBankID*/);
OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMinimum3Maximum3F16 = false;
bool HasMinimum3Maximum3PKF16 = false;
bool HasLshlAddU64Inst = false;
bool HasAddSubU64Insts = false;
bool HasPointSampleAccel = false;
bool HasLdsBarrierArriveAtomic = false;
bool HasSetPrioIncWgInst = false;
Expand Down Expand Up @@ -1500,6 +1501,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasVOPD3() const { return GFX1250Insts; }

// \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.
bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }

// \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.
bool hasVectorMulU64() const { return GFX1250Insts; }

// \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.
bool hasPkAddMinMaxInsts() const { return GFX1250Insts; }

Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);

if (Subtarget->hasScalarSMulU64())
if (Subtarget->hasVectorMulU64())
setOperationAction(ISD::MUL, MVT::i64, Legal);
else if (Subtarget->hasScalarSMulU64())
setOperationAction(ISD::MUL, MVT::i64, Custom);

if (Subtarget->hasMad64_32())
Expand Down Expand Up @@ -5421,6 +5423,19 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineOperand &Src0 = MI.getOperand(1);
MachineOperand &Src1 = MI.getOperand(2);

if (ST.hasAddSubU64Insts()) {
auto I = BuildMI(*BB, MI, DL,
TII->get(IsAdd ? AMDGPU::V_ADD_U64_e64
: AMDGPU::V_SUB_U64_e64),
Dest.getReg())
.add(Src0)
.add(Src1)
.addImm(0); // clamp
TII->legalizeOperands(*I);
MI.eraseFromParent();
return BB;
}

if (IsAdd && ST.hasLshlAddU64Inst()) {
auto Add = BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_LSHL_ADD_U64_e64),
Dest.getReg())
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7361,6 +7361,10 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
}

case AMDGPU::S_MUL_U64:
if (ST.hasVectorMulU64()) {
NewOpcode = AMDGPU::V_MUL_U64_e64;
break;
}
// Split s_mul_u64 in 32-bit vector multiplications.
splitScalarSMulU64(Worklist, Inst, MDT);
Inst.eraseFromParent();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2914,6 +2914,7 @@ def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
def VOP_I16_F32_F32 : VOPProfile <[i16, f32, f32, untyped]>;
def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
def VOP_I64_I64_I64_ARITH : VOPProfile <[i64, i64, i64, untyped], /*EnableClamp=*/1>;
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,17 @@ let isAdd = 1 in {
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">;
}

let isReMaterializable = 1 in {
let SubtargetPredicate = HasAddSubU64Insts, SchedRW = [Write64Bit] in {
defm V_ADD_U64 : VOP2Inst <"v_add_nc_u64", VOP_I64_I64_I64_ARITH>;
// We don't actually have something like V_SUBREV_U64 so V_SUB_U64 can't be treated as commutable.
let isCommutable = 0 in
defm V_SUB_U64 : VOP2Inst <"v_sub_nc_u64", VOP_I64_I64_I64_ARITH>;
} // End SubtargetPredicate = HasAddSubU64Insts, SchedRW = [Write64Bit]
let SubtargetPredicate = isGFX1250Plus, SchedRW = [WriteDouble] in
defm V_MUL_U64 : VOP2Inst <"v_mul_u64", VOP_I64_I64_I64, DivergentBinFrag<mul>>;
} // End isReMaterializable = 1

} // End isCommutable = 1

// These are special and do not read the exec mask.
Expand Down Expand Up @@ -1754,6 +1765,9 @@ multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName,
VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>,
VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>;

multiclass VOP2_Real_NO_DPP<GFXGen Gen, bits<6> op> :
VOP2_Real_e32<Gen, op>, VOP2_Real_e64<Gen, op>;

multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName,
string asmName> {
defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>,
Expand Down Expand Up @@ -1843,6 +1857,9 @@ defm V_FMAC_F64 : VOP2_Real_FULL<GFX12Gen, 0x17>;

defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;
defm V_ADD_U64 : VOP2_Real_FULL<GFX1250Gen, 0x28>;
defm V_SUB_U64 : VOP2_Real_FULL<GFX1250Gen, 0x29>;
defm V_MUL_U64 : VOP2_Real_NO_DPP<GFX1250Gen, 0x2a>;

//===----------------------------------------------------------------------===//
// GFX11.
Expand Down
Loading