@@ -335,6 +335,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
335335 return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
336336 }
337337
338+ bool isRegOrInlineImmWithFP64InputMods () const {
339+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
340+ }
341+
342+ bool isVRegWithInputMods (unsigned RCID) const { return isRegClass (RCID); }
343+
344+ bool isVRegWithFP32InputMods () const {
345+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
346+ }
347+
348+ bool isVRegWithFP64InputMods () const {
349+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
350+ }
351+
338352 bool isPackedFP16InputMods () const {
339353 return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
340354 }
@@ -527,7 +541,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
527541 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
528542 }
529543
530- bool isVCSrcB64 () const {
544+ bool isVCSrc_b64 () const {
531545 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
532546 }
533547
@@ -553,7 +567,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
553567 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
554568 }
555569
556- bool isVCSrcF64 () const {
570+ bool isVCSrc_f64 () const {
557571 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
558572 }
559573
@@ -601,7 +615,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
601615 return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
602616 }
603617
604- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
618+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
605619
606620 bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
607621
@@ -617,23 +631,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
617631
618632 bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
619633
620- bool isVCSrcV2FP32 () const {
621- return isVCSrcF64 ();
622- }
634+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
623635
624636 bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
625637
626- bool isVCSrcV2INT32 () const {
627- return isVCSrcB64 ();
628- }
638+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
629639
630640 bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
631641
632642 bool isVSrc_f32 () const {
633643 return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
634644 }
635645
636- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
646+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
637647
638648 bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
639649
@@ -1527,6 +1537,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15271537
15281538 bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
15291539
1540+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1541+
15301542 bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
15311543
15321544 bool isGFX10_BEncoding () const {
@@ -1774,8 +1786,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17741786 bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
17751787 bool validateSOPLiteral (const MCInst &Inst) const ;
17761788 bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1777- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1778- const OperandVector &Operands);
1789+ std::optional<unsigned > checkVOPDRegBankConstraints (const MCInst &Inst,
1790+ bool AsVOPD3);
1791+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1792+ bool tryVOPD (const MCInst &Inst);
1793+ bool tryVOPD3 (const MCInst &Inst);
1794+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1795+
17791796 bool validateIntClampSupported (const MCInst &Inst);
17801797 bool validateMIMGAtomicDMask (const MCInst &Inst);
17811798 bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3505,6 +3522,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
35053522 }
35063523 }
35073524
3525+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3526+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3527+ // Checking later during validateInstruction does not give a chance to retry
3528+ // parsing as a different encoding.
3529+ if (tryAnotherVOPDEncoding (Inst))
3530+ return Match_InvalidOperand;
3531+
35083532 return Match_Success;
35093533}
35103534
@@ -3685,8 +3709,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
36853709
36863710 return {getNamedOperandIdx (Opcode, OpName::src0X),
36873711 getNamedOperandIdx (Opcode, OpName::vsrc1X),
3712+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
36883713 getNamedOperandIdx (Opcode, OpName::src0Y),
36893714 getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3715+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
36903716 ImmXIdx,
36913717 ImmIdx};
36923718 }
@@ -3816,12 +3842,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
38163842 return false ;
38173843}
38183844
3819- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3820- const MCInst &Inst, const OperandVector &Operands ) {
3845+ std::optional< unsigned >
3846+ AMDGPUAsmParser::checkVOPDRegBankConstraints ( const MCInst &Inst, bool AsVOPD3 ) {
38213847
38223848 const unsigned Opcode = Inst.getOpcode ();
38233849 if (!isVOPD (Opcode))
3824- return true ;
3850+ return {} ;
38253851
38263852 const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
38273853
@@ -3833,23 +3859,73 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38333859 };
38343860
38353861 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3862+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3863+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3864+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3865+ bool AllowSameVGPR = isGFX1250 ();
3866+
3867+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3868+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3869+ int I = getNamedOperandIdx (Opcode, OpName);
3870+ const MCOperand &Op = Inst.getOperand (I);
3871+ if (!Op.isImm ())
3872+ continue ;
3873+ int64_t Imm = Op.getImm ();
3874+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3875+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3876+ return I;
3877+ }
3878+
3879+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3880+ OpName::vsrc2Y, OpName::imm}) {
3881+ int I = getNamedOperandIdx (Opcode, OpName);
3882+ if (I == -1 )
3883+ continue ;
3884+ const MCOperand &Op = Inst.getOperand (I);
3885+ if (Op.isImm ())
3886+ return I;
3887+ }
3888+ }
38373889
38383890 const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3839- auto InvalidCompOprIdx =
3840- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3841- if (!InvalidCompOprIdx)
3891+ auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex (
3892+ getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3893+
3894+ return InvalidCompOprIdx;
3895+ }
3896+
3897+ bool AMDGPUAsmParser::validateVOPD (const MCInst &Inst,
3898+ const OperandVector &Operands) {
3899+
3900+ unsigned Opcode = Inst.getOpcode ();
3901+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3902+
3903+ if (AsVOPD3) {
3904+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3905+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3906+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3907+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3908+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3909+ }
3910+ }
3911+
3912+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3913+ if (!InvalidCompOprIdx.has_value ())
38423914 return true ;
38433915
38443916 auto CompOprIdx = *InvalidCompOprIdx;
3917+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
38453918 auto ParsedIdx =
38463919 std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
38473920 InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
38483921 assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
38493922
38503923 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
38513924 if (CompOprIdx == VOPD::Component::DST) {
3852- Error (Loc, " one dst register must be even and the other odd" );
3925+ if (AsVOPD3)
3926+ Error (Loc, " dst registers must be distinct" );
3927+ else
3928+ Error (Loc, " one dst register must be even and the other odd" );
38533929 } else {
38543930 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
38553931 Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3859,6 +3935,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38593935 return false ;
38603936}
38613937
3938+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
3939+ // potentially used as VOPD3 with the same operands.
3940+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
3941+ // First check if it fits VOPD
3942+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
3943+ if (!InvalidCompOprIdx.has_value ())
3944+ return false ;
3945+
3946+ // Then if it fits VOPD3
3947+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
3948+ if (InvalidCompOprIdx.has_value ()) {
3949+ // If failed operand is dst it is better to show error about VOPD3
3950+ // instruction as it has more capabilities and error message will be
3951+ // more informative. If the dst is not legal for VOPD3, then it is not
3952+ // legal for VOPD either.
3953+ if (*InvalidCompOprIdx == VOPD::Component::DST)
3954+ return true ;
3955+
3956+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
3957+ // with a conflict in tied implicit src2 of fmac and no asm operand to
3958+ // to point to.
3959+ return false ;
3960+ }
3961+ return true ;
3962+ }
3963+
3964+ // \returns true is a VOPD3 instruction can be also represented as a shorter
3965+ // VOPD encoding.
3966+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
3967+ const unsigned Opcode = Inst.getOpcode ();
3968+ const auto &II = getVOPDInstInfo (Opcode, &MII);
3969+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
3970+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
3971+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
3972+ return false ;
3973+
3974+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
3975+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
3976+ // be parsed as VOPD which does not accept src2.
3977+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
3978+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
3979+ return false ;
3980+
3981+ // If any modifiers are set this cannot be VOPD.
3982+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
3983+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
3984+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
3985+ int I = getNamedOperandIdx (Opcode, OpName);
3986+ if (I == -1 )
3987+ continue ;
3988+ if (Inst.getOperand (I).getImm ())
3989+ return false ;
3990+ }
3991+
3992+ return !tryVOPD3 (Inst);
3993+ }
3994+
3995+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
3996+ // form but switch to VOPD3 otherwise.
3997+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
3998+ const unsigned Opcode = Inst.getOpcode ();
3999+ if (!isGFX1250 () || !isVOPD (Opcode))
4000+ return false ;
4001+
4002+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4003+ return tryVOPD (Inst);
4004+ return tryVOPD3 (Inst);
4005+ }
4006+
38624007bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
38634008
38644009 const unsigned Opc = Inst.getOpcode ();
@@ -5179,7 +5324,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
51795324 if (!validateConstantBusLimitations (Inst, Operands)) {
51805325 return false ;
51815326 }
5182- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5327+ if (!validateVOPD (Inst, Operands)) {
51835328 return false ;
51845329 }
51855330 if (!validateIntClampSupported (Inst)) {
@@ -9180,8 +9325,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
91809325
91819326// Create VOPD MCInst operands using parsed assembler operands.
91829327void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9328+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9329+
91839330 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
91849331 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9332+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9333+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9334+ return ;
9335+ }
91859336 if (Op.isReg ()) {
91869337 Op.addRegOperands (Inst, 1 );
91879338 return ;
@@ -9210,6 +9361,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
92109361 if (CInfo.hasSrc2Acc ())
92119362 addOp (CInfo.getIndexOfDstInParsedOperands ());
92129363 }
9364+
9365+ int BitOp3Idx =
9366+ AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::bitop3);
9367+ if (BitOp3Idx != -1 ) {
9368+ OptionalImmIndexMap OptIdx;
9369+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9370+ if (Op.isImm ())
9371+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9372+
9373+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9374+ }
92139375}
92149376
92159377// ===----------------------------------------------------------------------===//
0 commit comments