Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,14 +447,42 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
return;
}

bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
uint64_t C = 0;
bool AllConst = true;
unsigned EltSize = EltVT.getSizeInBits();
for (unsigned I = 0; I < NumVectorElts; ++I) {
SDValue Op = N->getOperand(I);
if (Op.isUndef()) {
AllConst = false;
break;
}
uint64_t Val;
if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
} else
Val = cast<ConstantSDNode>(Op)->getZExtValue();
Comment on lines +463 to +466
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
} else
Val = cast<ConstantSDNode>(Op)->getZExtValue();
if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op))
Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
else
Val = cast<ConstantSDNode>(Op)->getZExtValue();

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Matt was asking to use braces if we have defined a variable inside if.

Copy link
Contributor

@shiltian shiltian Jul 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, okay, then use {} for else. Use half of them is not a good idea.

I'm not sure if LLVM code standard says we need a {} for variable definition.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And again, I've been asked multiple times not to use it on else in this situation ;)

C |= Val << (EltSize * I);
}
if (AllConst) {
SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64);
MachineSDNode *Copy =
CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
RegClass);
return;
}
}

assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet");
// 32 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);

bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
Expand Down Expand Up @@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {

case ISD::Constant:
case ISD::ConstantFP: {
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
Subtarget->has64BitLiterals())
break;

uint64_t Imm;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12155,6 +12155,11 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
bitOpWithConstantIsReducible(Opc, ValHi)) ||
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
// We have 64-bit scalar and/or/xor, but do not have vector forms.
if (Subtarget->has64BitLiterals() && CRHS->hasOneUse() &&
!CRHS->user_begin()->isDivergent())
return SDValue();

// If we need to materialize a 64-bit immediate, it will be split up later
// anyway. Avoid creating the harder to understand 64-bit immediate
// materialization.
Expand Down
31 changes: 28 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2273,6 +2273,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
const MachineOperand &SrcOp = MI.getOperand(1);
assert(!SrcOp.isFPImm());

if (ST.has64BitLiterals()) {
MI.setDesc(get(AMDGPU::S_MOV_B64));
break;
}

APInt Imm(64, SrcOp.getImm());
if (Imm.isIntN(32) || isInlineConstant(Imm)) {
MI.setDesc(get(AMDGPU::S_MOV_B64));
Expand Down Expand Up @@ -6099,14 +6105,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
if (Is64BitOp &&
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
(!ST.has64BitLiterals() || InstDesc.getSize() != 4))
return false;

// FIXME: We can use sign extended 64-bit literals, but only for signed
// operands. At the moment we do not know if an operand is signed.
// Such operand will be encoded as its low 32 bits and then either
// correctly sign extended or incorrectly zero extended by HW.
if (!Is64BitFPOp && (int32_t)Imm < 0)
// If 64-bit literals are supported and the literal will be encoded
// as full 64 bit we still can use it.
if (!Is64BitFPOp && (int32_t)Imm < 0 &&
(!ST.has64BitLiterals() || AMDGPU::isValid32BitLiteral(Imm, false)))
return false;
}
}
Expand Down Expand Up @@ -9178,15 +9188,30 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isDPP(MI))
return DescSize;
bool HasLiteral = false;
unsigned LiteralSize = 4;
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
const MCOperandInfo &OpInfo = Desc.operands()[I];
if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) {
HasLiteral = true;
if (ST.has64BitLiterals()) {
switch (OpInfo.OperandType) {
default:
break;
case AMDGPU::OPERAND_REG_IMM_FP64:
if (!AMDGPU::isValid32BitLiteral(Op.getImm(), true))
LiteralSize = 8;
break;
case AMDGPU::OPERAND_REG_IMM_INT64:
if (!Op.isImm() || !AMDGPU::isValid32BitLiteral(Op.getImm(), false))
LiteralSize = 8;
break;
}
}
break;
}
}
return HasLiteral ? DescSize + 4 : DescSize;
return HasLiteral ? DescSize + LiteralSize : DescSize;
}

// Check whether we have extra NSA words.
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
// fold an immediate into the shrunk instruction as a literal operand. In
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
// no advantage to doing this.
if (ST->hasVOP3Literal() && !IsPostRA)
// However, if 64-bit literals are allowed we still need to shrink it
// for such literal to be able to fold.
if (ST->hasVOP3Literal() &&
(!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
!IsPostRA)
continue;

if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ bool isMAC(unsigned Opc) {
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,14 @@ multiclass VOP2Inst_e64<string opName,
def _e64 : VOP3InstBase <opName, P, node, 1>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;

let SubtargetPredicate = isGFX11Plus in {
if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
} // End SubtargetPredicate = isGFX11Plus
if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
let SubtargetPredicate = isGFX11Plus;
}
else if P.HasExt64BitDPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P> {
let OtherPredicates = [HasDPALU_DPP];
}
}

multiclass VOP2Inst_e64_VOPD<string opName,
Expand Down Expand Up @@ -1492,7 +1496,9 @@ class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
VOP2_DPP<op, ps, opName, p, 1> {
let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
let OtherPredicates = !listconcat(ps.OtherPredicates,
!if(p.HasExt64BitDPP, [HasDPALU_DPP], []),
!if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []));
}

class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
Expand Down Expand Up @@ -1832,6 +1838,9 @@ let SubtargetPredicate = isGFX12Plus in {
V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
} // End SubtargetPredicate = isGFX12Plus

let SubtargetPredicate = HasFmacF64Inst in
defm V_FMAC_F64 : VOP2_Real_FULL<GFX12Gen, 0x17>;

defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;

Expand Down
Loading
Loading