@@ -335,6 +335,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
335335 return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
336336 }
337337
338+ bool isRegOrInlineImmWithFP64InputMods () const {
339+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
340+ }
341+
342+ bool isVRegWithInputMods (unsigned RCID) const { return isRegClass (RCID); }
343+
344+ bool isVRegWithFP32InputMods () const {
345+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
346+ }
347+
348+ bool isVRegWithFP64InputMods () const {
349+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
350+ }
351+
338352 bool isPackedFP16InputMods () const {
339353 return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
340354 }
@@ -527,7 +541,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
527541 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
528542 }
529543
530- bool isVCSrcB64 () const {
544+ bool isVCSrc_b64 () const {
531545 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
532546 }
533547
@@ -553,7 +567,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
553567 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
554568 }
555569
556- bool isVCSrcF64 () const {
570+ bool isVCSrc_f64 () const {
557571 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
558572 }
559573
@@ -601,7 +615,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
601615 return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
602616 }
603617
604- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
618+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
605619
606620 bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
607621
@@ -617,23 +631,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617631
618632 bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
619633
620- bool isVCSrcV2FP32 () const {
621- return isVCSrcF64 ();
622- }
634+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
623635
624636 bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
625637
626- bool isVCSrcV2INT32 () const {
627- return isVCSrcB64 ();
628- }
638+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
629639
630640 bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
631641
632642 bool isVSrc_f32 () const {
633643 return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
634644 }
635645
636- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
646+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
637647
638648 bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
639649
@@ -1527,6 +1537,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15271537
15281538 bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
15291539
1540+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1541+
15301542 bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
15311543
15321544 bool isGFX10_BEncoding () const {
@@ -1774,8 +1786,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17741786 bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
17751787 bool validateSOPLiteral (const MCInst &Inst) const ;
17761788 bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1777- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1778- const OperandVector &Operands);
1789+ std::optional<unsigned > checkVOPDRegBankConstraints (const MCInst &Inst,
1790+ bool AsVOPD3);
1791+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1792+ bool tryVOPD (const MCInst &Inst);
1793+ bool tryVOPD3 (const MCInst &Inst);
1794+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1795+
17791796 bool validateIntClampSupported (const MCInst &Inst);
17801797 bool validateMIMGAtomicDMask (const MCInst &Inst);
17811798 bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3505,6 +3522,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
35053522 }
35063523 }
35073524
3525+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3526+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3527+ // Checking later during validateInstruction does not give a chance to retry
3528+ // parsing as a different encoding.
3529+ if (tryAnotherVOPDEncoding (Inst))
3530+ return Match_InvalidOperand;
3531+
35083532 return Match_Success;
35093533}
35103534
@@ -3685,8 +3709,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
36853709
36863710 return {getNamedOperandIdx (Opcode, OpName::src0X),
36873711 getNamedOperandIdx (Opcode, OpName::vsrc1X),
3712+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
36883713 getNamedOperandIdx (Opcode, OpName::src0Y),
36893714 getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3715+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
36903716 ImmXIdx,
36913717 ImmIdx};
36923718 }
@@ -3816,12 +3842,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
38163842 return false ;
38173843}
38183844
3819- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3820- const MCInst &Inst, const OperandVector &Operands ) {
3845+ std::optional< unsigned >
3846+ AMDGPUAsmParser::checkVOPDRegBankConstraints ( const MCInst &Inst, bool AsVOPD3 ) {
38213847
38223848 const unsigned Opcode = Inst.getOpcode ();
38233849 if (!isVOPD (Opcode))
3824- return true ;
3850+ return {} ;
38253851
38263852 const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
38273853
@@ -3832,24 +3858,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38323858 : MCRegister ();
38333859 };
38343860
3835- // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3861+ // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3862+ // source-cache.
3863+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3864+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3865+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3866+ bool AllowSameVGPR = isGFX1250 ();
3867+
3868+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3869+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3870+ int I = getNamedOperandIdx (Opcode, OpName);
3871+ const MCOperand &Op = Inst.getOperand (I);
3872+ if (!Op.isImm ())
3873+ continue ;
3874+ int64_t Imm = Op.getImm ();
3875+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3876+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3877+ return (unsigned )I;
3878+ }
3879+
3880+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3881+ OpName::vsrc2Y, OpName::imm}) {
3882+ int I = getNamedOperandIdx (Opcode, OpName);
3883+ if (I == -1 )
3884+ continue ;
3885+ const MCOperand &Op = Inst.getOperand (I);
3886+ if (Op.isImm ())
3887+ return (unsigned )I;
3888+ }
3889+ }
38373890
38383891 const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3839- auto InvalidCompOprIdx =
3840- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3841- if (!InvalidCompOprIdx)
3892+ auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex (
3893+ getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3894+
3895+ return InvalidCompOprIdx;
3896+ }
3897+
3898+ bool AMDGPUAsmParser::validateVOPD (const MCInst &Inst,
3899+ const OperandVector &Operands) {
3900+
3901+ unsigned Opcode = Inst.getOpcode ();
3902+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3903+
3904+ if (AsVOPD3) {
3905+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3906+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3907+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3908+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3909+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3910+ }
3911+ }
3912+
3913+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3914+ if (!InvalidCompOprIdx.has_value ())
38423915 return true ;
38433916
38443917 auto CompOprIdx = *InvalidCompOprIdx;
3918+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
38453919 auto ParsedIdx =
38463920 std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
38473921 InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
38483922 assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
38493923
38503924 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
38513925 if (CompOprIdx == VOPD::Component::DST) {
3852- Error (Loc, " one dst register must be even and the other odd" );
3926+ if (AsVOPD3)
3927+ Error (Loc, " dst registers must be distinct" );
3928+ else
3929+ Error (Loc, " one dst register must be even and the other odd" );
38533930 } else {
38543931 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
38553932 Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3859,6 +3936,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38593936 return false ;
38603937}
38613938
3939+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
3940+ // potentially used as VOPD3 with the same operands.
3941+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
3942+ // First check if it fits VOPD
3943+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
3944+ if (!InvalidCompOprIdx.has_value ())
3945+ return false ;
3946+
3947+ // Then if it fits VOPD3
3948+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
3949+ if (InvalidCompOprIdx.has_value ()) {
3950+ // If failed operand is dst it is better to show error about VOPD3
3951+ // instruction as it has more capabilities and error message will be
3952+ // more informative. If the dst is not legal for VOPD3, then it is not
3953+ // legal for VOPD either.
3954+ if (*InvalidCompOprIdx == VOPD::Component::DST)
3955+ return true ;
3956+
3957+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3958+ // with a conflict in tied implicit src2 of fmac and no asm operand to
3959+ // to point to.
3960+ return false ;
3961+ }
3962+ return true ;
3963+ }
3964+
3965+ // \returns true is a VOPD3 instruction can be also represented as a shorter
3966+ // VOPD encoding.
3967+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
3968+ const unsigned Opcode = Inst.getOpcode ();
3969+ const auto &II = getVOPDInstInfo (Opcode, &MII);
3970+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
3971+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
3972+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
3973+ return false ;
3974+
3975+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3976+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3977+ // be parsed as VOPD which does not accept src2.
3978+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
3979+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
3980+ return false ;
3981+
3982+ // If any modifiers are set this cannot be VOPD.
3983+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
3984+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
3985+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
3986+ int I = getNamedOperandIdx (Opcode, OpName);
3987+ if (I == -1 )
3988+ continue ;
3989+ if (Inst.getOperand (I).getImm ())
3990+ return false ;
3991+ }
3992+
3993+ return !tryVOPD3 (Inst);
3994+ }
3995+
3996+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
3997+ // form but switch to VOPD3 otherwise.
3998+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
3999+ const unsigned Opcode = Inst.getOpcode ();
4000+ if (!isGFX1250 () || !isVOPD (Opcode))
4001+ return false ;
4002+
4003+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4004+ return tryVOPD (Inst);
4005+ return tryVOPD3 (Inst);
4006+ }
4007+
38624008bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
38634009
38644010 const unsigned Opc = Inst.getOpcode ();
@@ -5179,7 +5325,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
51795325 if (!validateConstantBusLimitations (Inst, Operands)) {
51805326 return false ;
51815327 }
5182- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5328+ if (!validateVOPD (Inst, Operands)) {
51835329 return false ;
51845330 }
51855331 if (!validateIntClampSupported (Inst)) {
@@ -9180,8 +9326,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
91809326
91819327// Create VOPD MCInst operands using parsed assembler operands.
91829328void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9329+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9330+
91839331 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
91849332 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9333+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9334+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9335+ return ;
9336+ }
91859337 if (Op.isReg ()) {
91869338 Op.addRegOperands (Inst, 1 );
91879339 return ;
@@ -9210,6 +9362,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
92109362 if (CInfo.hasSrc2Acc ())
92119363 addOp (CInfo.getIndexOfDstInParsedOperands ());
92129364 }
9365+
9366+ int BitOp3Idx =
9367+ AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::bitop3);
9368+ if (BitOp3Idx != -1 ) {
9369+ OptionalImmIndexMap OptIdx;
9370+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9371+ if (Op.isImm ())
9372+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9373+
9374+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9375+ }
92139376}
92149377
92159378// ===----------------------------------------------------------------------===//
0 commit comments