-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] MC support for v_fmaak_f64/v_fmamk_f64 gfx1250 intructions #148282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rampitec
merged 1 commit into
main
from
users/rampitec/07-11-_amdgpu_mc_support_for_v_fmaak_f64_v_fmamk_f64_gfx1250_intructions
Jul 11, 2025
Merged
[AMDGPU] MC support for v_fmaak_f64/v_fmamk_f64 gfx1250 intructions #148282
rampitec
merged 1 commit into
main
from
users/rampitec/07-11-_amdgpu_mc_support_for_v_fmaak_f64_v_fmamk_f64_gfx1250_intructions
Jul 11, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 42.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148282.diff 17 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 91ace4d2b7f16..31420caca0899 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2488,6 +2488,10 @@ def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
+def HasFmaakFmamkF64Insts :
+ Predicate<"Subtarget->hasFmaakFmamkF64Insts()">,
+ AssemblerPredicate<(any_of FeatureGFX1250Insts)>;
+
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
AssemblerPredicate<(all_of FeatureImageInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 35de49c27b32a..886de501dadac 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -951,6 +951,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isLiteralImm(MVT::f16);
}
+ bool isKImmFP64() const {
+ return isLiteralImm(MVT::f64);
+ }
+
bool isMem() const override {
return false;
}
@@ -2003,6 +2007,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_KIMM64:
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
@@ -2343,6 +2348,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
+ case AMDGPU::OPERAND_KIMM64:
+ Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindMandatoryLiteral();
+ return;
+
case AMDGPU::OPERAND_REG_IMM_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
@@ -2548,6 +2558,13 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
setImmKindMandatoryLiteral();
return;
+ case AMDGPU::OPERAND_KIMM64:
+ if ((isInt<32>(Val) || isUInt<32>(Val)) && !getModifiers().Lit64)
+ Val <<= 32;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ setImmKindMandatoryLiteral();
+ return;
default:
llvm_unreachable("invalid operand size");
}
@@ -4992,7 +5009,7 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
unsigned NumExprs = 0;
unsigned NumLiterals = 0;
- uint32_t LiteralValue;
+ uint64_t LiteralValue;
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@@ -5006,16 +5023,21 @@ bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
uint64_t Value = static_cast<uint64_t>(MO.getImm());
- bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
+ bool IsForcedFP64 =
+ Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
+ (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
+ HasMandatoryLiteral);
+ bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
- if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
+ if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
+ !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
Error(getLitLoc(Operands), "invalid operand for instruction");
return false;
}
- if (IsFP64 && IsValid32Op)
+ if (IsFP64 && IsValid32Op && !IsForcedFP64)
Value = Hi_32(Value);
if (NumLiterals == 0 || LiteralValue != Value) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 7b1ea11d58168..a19927b2c2f0c 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -353,6 +353,13 @@ static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
+static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
+}
+
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
@@ -613,6 +620,15 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
+ if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
+ // Return 8 bytes for a potential literal.
+ Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
+
+ if (isGFX1250() &&
+ tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
+ break;
+ }
+
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -1467,6 +1483,17 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
return MCOperand::createImm(Literal);
}
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
+ if (HasLiteral) {
+ if (Literal64 != Val)
+ return errOperand(Val, "More than one unique literal is illegal");
+ }
+ HasLiteral = true;
+ Literal = Literal64 = Val;
+ return MCOperand::createImm(Literal64);
+}
+
MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 8927f208fd2af..84041001b6ba7 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -178,6 +178,7 @@ class AMDGPUDisassembler : public MCDisassembler {
static MCOperand decodeIntImmed(unsigned Imm);
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
+ MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const;
MCOperand decodeLiteralConstant(bool ExtendFP64) const;
MCOperand decodeLiteral64Constant() const;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 8ea60871b6613..e6dd98a104209 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1100,6 +1100,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= GFX10 || hasGFX940Insts();
}
+ bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }
+
bool hasImageInsts() const {
return HasImageInsts;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 8ce12dfeda779..cb6319ed627ca 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -76,6 +76,18 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
+void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // KIMM64
+ // This part needs to align with AMDGPUInstPrinter::printImmediate64.
+ uint64_t Imm = MI->getOperand(OpNo).getImm();
+ if (STI.hasFeature(AMDGPU::Feature64BitLiterals) && Lo_32(Imm))
+ O << "lit64(" << formatHex(static_cast<uint64_t>(Imm)) << ')';
+ else
+ O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
+}
+
void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
raw_ostream &O, StringRef BitName) {
if (MI->getOperand(OpNo).getImm()) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 071e0a9d0fee6..fb803b1f81342 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -42,6 +42,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFP64ImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef BitName);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index f0f655e93f4cc..2cd8af918f324 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -87,9 +87,10 @@ class AMDGPUMCCodeEmitter : public MCCodeEmitter {
const MCSubtargetInfo &STI) const;
/// Encode an fp or int literal.
- std::optional<uint32_t> getLitEncoding(const MCOperand &MO,
- const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const;
+ std::optional<uint64_t>
+ getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
+ const MCSubtargetInfo &STI,
+ bool HasMandatoryLiteral = false) const;
void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
APInt &Inst, APInt &Scratch,
@@ -265,10 +266,11 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI,
: 255;
}
-std::optional<uint32_t>
+std::optional<uint64_t>
AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI,
+ bool HasMandatoryLiteral) const {
int64_t Imm;
if (MO.isExpr()) {
if (!MO.getExpr()->evaluateAsAbsolute(Imm))
@@ -303,9 +305,13 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI, true);
+ case AMDGPU::OPERAND_REG_IMM_FP64: {
+ auto Enc = getLit64Encoding(static_cast<uint64_t>(Imm), STI, true);
+ return (HasMandatoryLiteral && Enc == 255) ? 254 : Enc;
+ }
+
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
return getLit16IntEncoding(static_cast<uint32_t>(Imm), STI);
@@ -339,6 +345,7 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_KIMM64:
return MO.getImm();
default:
llvm_unreachable("invalid operand size");
@@ -685,7 +692,10 @@ void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
- if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI)) {
+ bool HasMandatoryLiteral =
+ AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI,
+ HasMandatoryLiteral)) {
Op = *Enc;
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 7875b2812fe63..a8649970aa825 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -229,6 +229,7 @@ enum OperandType : unsigned {
/// Operand with 32-bit immediate that uses the constant bus.
OPERAND_KIMM32,
OPERAND_KIMM16,
+ OPERAND_KIMM64,
/// Operands with an AccVGPR register or inline constant
OPERAND_REG_INLINE_AC_INT32,
@@ -254,7 +255,7 @@ enum OperandType : unsigned {
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
OPERAND_KIMM_FIRST = OPERAND_KIMM32,
- OPERAND_KIMM_LAST = OPERAND_KIMM16
+ OPERAND_KIMM_LAST = OPERAND_KIMM64
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ec7ef66f2c1aa..ca3af3b48a600 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4442,6 +4442,7 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
}
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
+ case AMDGPU::OPERAND_KIMM64:
return false;
case AMDGPU::OPERAND_INPUT_MODS:
case MCOI::OPERAND_IMMEDIATE:
@@ -4867,6 +4868,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
break;
case MCOI::OPERAND_IMMEDIATE:
case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM64:
// Check if this operand is an immediate.
// FrameIndex operands will be replaced by immediates, so they are
// allowed.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index aa0e1fe529d70..5e41f875d980a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1316,6 +1316,12 @@ def KImmFP32 : KImmFPOperand<i32>;
// constant bus.
def KImmFP16 : KImmFPOperand<i16>;
+// 64-bit VALU immediate operand that uses the constant bus.
+def KImmFP64 : KImmFPOperand<i64> {
+ let DecoderMethod = "decodeOperand_KImmFP64";
+ let PrintMethod = "printFP64ImmOperand";
+}
+
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e6840d97e3f3d..6708e0a3f4549 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1603,6 +1603,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ case AMDGPU::OPERAND_KIMM64:
return 8;
case AMDGPU::OPERAND_REG_IMM_INT16:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 834dc945e64f6..25c6cbc3e1ab5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -36,6 +36,20 @@ class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
let Inst{63-32} = imm;
}
+class VOP2_MADK64e <bits<6> op, VOPProfile P> : Enc96 {
+ bits<8> vdst;
+ bits<9> src0;
+ bits<8> src1;
+ bits<64> imm;
+
+ let Inst{8-0} = !if(P.HasSrc0, src0, 0);
+ let Inst{16-9} = !if(P.HasSrc1, src1, 0);
+ let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; // encoding
+ let Inst{95-32} = imm;
+}
+
class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
bits<8> src1;
@@ -375,10 +389,14 @@ class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
}
class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32,
+ !if(!eq(vt.Size, 64), KImmFP64,
+ KImmFP16));
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
- (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ !if(!eq(vt.Size, 64),
+ (ins VSrc_f64:$src0, VReg_64:$src1, ImmOpType:$imm),
+ (ins VSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)));
field dag InsVOPDX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
let InsVOPDX_immX = (ins VSrc_f32:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immX);
field dag InsVOPDY = (ins VSrc_f32:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
@@ -404,12 +422,17 @@ def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm);
}
def VOP_MADAK_F32 : VOP_MADAK <f32>;
+def VOP_MADAK_F64 : VOP_MADAK <f64>;
class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32,
+ !if(!eq(vt.Size, 64), KImmFP64,
+ KImmFP16));
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1),
- (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1));
+ !if(!eq(vt.Size, 64),
+ (ins VSrc_f64:$src0, ImmOpType:$imm, VReg_64:$src1),
+ (ins VSrc_f16:$src0, ImmOpType:$imm, VGPR_32:$src1)));
field dag InsVOPDX = (ins VSrc_f32:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
let InsVOPDX_immX = (ins VSrc_f32:$src0X, ImmOpType:$immX, VGPR_32:$vsrc1X);
field dag InsVOPDY = (ins VSrc_f32:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
@@ -435,6 +458,7 @@ def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
let Ins32 = (ins VSrcFake16_f16_Lo128:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1);
}
def VOP_MADMK_F32 : VOP_MADMK <f32>;
+def VOP_MADMK_F64 : VOP_MADMK <f64>;
// Returns the vreg register class to use for sources of VOP3 instructions for the
// given VT.
@@ -1296,6 +1320,14 @@ let isCommutable = 1 in
def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, CanBeVOPD3X = 0, FixedSize = 1
+let SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1,
+ FixedSize = 1, Size = 12, SchedRW = [Write64Bit] in {
+def V_FMAMK_F64 : VOP2_Pseudo<"v_fmamk_f64", VOP_MADMK_F64, [], "">;
+
+let isCommutable = 1 in
+def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">;
+} // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit]
+
let SubtargetPredicate = HasPkFmacF16Inst in {
defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
} // End SubtargetPredicate = HasPkFmacF16Inst
@@ -1518,6 +1550,14 @@ multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
+multiclass VOP2Only_Real_MADK64<GFXGen Gen, bits<6> op> {
+ def Gen.Suffix :
+ VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>,
+ VOP2_MADK64e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
+ let DecoderNamespace = Gen.DecoderNamespace;
+ }
+}
+
multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName,
string opName = NAME> {
def Gen.Suffix :
@@ -1792,6 +1832,9 @@ let SubtargetPredicate = isGFX12Plus in {
V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
} // End SubtargetPredicate = isGFX12Plus
+defm V_FMAMK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x23>;
+defm V_FMAAK_F64 : VOP2Only_Real_MADK64<GFX1250Gen, 0x24>;
+...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
d83b4ad to
0886d6a
Compare
shiltian
approved these changes
Jul 11, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

No description provided.