@@ -336,6 +336,20 @@ class AMDGPUOperand : public MCParsedAsmOperand {
336336 return isRegOrInline (AMDGPU::VS_32RegClassID, MVT::f32 );
337337 }
338338
339+ bool isRegOrInlineImmWithFP64InputMods () const {
340+ return isRegOrInline (AMDGPU::VS_64RegClassID, MVT::f64 );
341+ }
342+
343+ bool isVRegWithInputMods (unsigned RCID) const { return isRegClass (RCID); }
344+
345+ bool isVRegWithFP32InputMods () const {
346+ return isVRegWithInputMods (AMDGPU::VGPR_32RegClassID);
347+ }
348+
349+ bool isVRegWithFP64InputMods () const {
350+ return isVRegWithInputMods (AMDGPU::VReg_64RegClassID);
351+ }
352+
339353 bool isPackedFP16InputMods () const {
340354 return isRegOrImmWithInputMods (AMDGPU::VS_32RegClassID, MVT::v2f16);
341355 }
@@ -531,7 +545,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
531545 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::i32 );
532546 }
533547
534- bool isVCSrcB64 () const {
548+ bool isVCSrc_b64 () const {
535549 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::i64 );
536550 }
537551
@@ -557,7 +571,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
557571 return isRegOrInlineNoMods (AMDGPU::VS_32RegClassID, MVT::f32 );
558572 }
559573
560- bool isVCSrcF64 () const {
574+ bool isVCSrc_f64 () const {
561575 return isRegOrInlineNoMods (AMDGPU::VS_64RegClassID, MVT::f64 );
562576 }
563577
@@ -605,7 +619,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
605619 return isVCSrc_f32 () || isLiteralImm (MVT::i32 ) || isExpr ();
606620 }
607621
608- bool isVSrc_b64 () const { return isVCSrcF64 () || isLiteralImm (MVT::i64 ); }
622+ bool isVSrc_b64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::i64 ); }
609623
610624 bool isVSrcT_b16 () const { return isVCSrcT_b16 () || isLiteralImm (MVT::i16 ); }
611625
@@ -621,23 +635,19 @@ class AMDGPUOperand : public MCParsedAsmOperand {
621635
622636 bool isVSrc_v2b16 () const { return isVSrc_b16 () || isLiteralImm (MVT::v2i16); }
623637
624- bool isVCSrcV2FP32 () const {
625- return isVCSrcF64 ();
626- }
638+ bool isVCSrcV2FP32 () const { return isVCSrc_f64 (); }
627639
628640 bool isVSrc_v2f32 () const { return isVSrc_f64 () || isLiteralImm (MVT::v2f32); }
629641
630- bool isVCSrcV2INT32 () const {
631- return isVCSrcB64 ();
632- }
642+ bool isVCSrc_v2b32 () const { return isVCSrc_b64 (); }
633643
634644 bool isVSrc_v2b32 () const { return isVSrc_b64 () || isLiteralImm (MVT::v2i32); }
635645
636646 bool isVSrc_f32 () const {
637647 return isVCSrc_f32 () || isLiteralImm (MVT::f32 ) || isExpr ();
638648 }
639649
640- bool isVSrc_f64 () const { return isVCSrcF64 () || isLiteralImm (MVT::f64 ); }
650+ bool isVSrc_f64 () const { return isVCSrc_f64 () || isLiteralImm (MVT::f64 ); }
641651
642652 bool isVSrcT_bf16 () const { return isVCSrcTBF16 () || isLiteralImm (MVT::bf16 ); }
643653
@@ -1531,6 +1541,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
15311541
15321542 bool isGFX12Plus () const { return AMDGPU::isGFX12Plus (getSTI ()); }
15331543
1544+ bool isGFX1250 () const { return AMDGPU::isGFX1250 (getSTI ()); }
1545+
15341546 bool isGFX10_AEncoding () const { return AMDGPU::isGFX10_AEncoding (getSTI ()); }
15351547
15361548 bool isGFX10_BEncoding () const {
@@ -1782,8 +1794,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
17821794 bool validateSMEMOffset (const MCInst &Inst, const OperandVector &Operands);
17831795 bool validateSOPLiteral (const MCInst &Inst) const ;
17841796 bool validateConstantBusLimitations (const MCInst &Inst, const OperandVector &Operands);
1785- bool validateVOPDRegBankConstraints (const MCInst &Inst,
1786- const OperandVector &Operands);
1797+ std::optional<unsigned > checkVOPDRegBankConstraints (const MCInst &Inst,
1798+ bool AsVOPD3);
1799+ bool validateVOPD (const MCInst &Inst, const OperandVector &Operands);
1800+ bool tryVOPD (const MCInst &Inst);
1801+ bool tryVOPD3 (const MCInst &Inst);
1802+ bool tryAnotherVOPDEncoding (const MCInst &Inst);
1803+
17871804 bool validateIntClampSupported (const MCInst &Inst);
17881805 bool validateMIMGAtomicDMask (const MCInst &Inst);
17891806 bool validateMIMGGatherDMask (const MCInst &Inst);
@@ -3569,6 +3586,13 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
35693586 }
35703587 }
35713588
3589+ // Asm can first try to match VOPD or VOPD3. By failing early here with
3590+ // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3591+ // Checking later during validateInstruction does not give a chance to retry
3592+ // parsing as a different encoding.
3593+ if (tryAnotherVOPDEncoding (Inst))
3594+ return Match_InvalidOperand;
3595+
35723596 return Match_Success;
35733597}
35743598
@@ -3749,8 +3773,10 @@ static OperandIndices getSrcOperandIndices(unsigned Opcode,
37493773
37503774 return {getNamedOperandIdx (Opcode, OpName::src0X),
37513775 getNamedOperandIdx (Opcode, OpName::vsrc1X),
3776+ getNamedOperandIdx (Opcode, OpName::vsrc2X),
37523777 getNamedOperandIdx (Opcode, OpName::src0Y),
37533778 getNamedOperandIdx (Opcode, OpName::vsrc1Y),
3779+ getNamedOperandIdx (Opcode, OpName::vsrc2Y),
37543780 ImmXIdx,
37553781 ImmIdx};
37563782 }
@@ -3880,12 +3906,12 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(
38803906 return false ;
38813907}
38823908
3883- bool AMDGPUAsmParser::validateVOPDRegBankConstraints (
3884- const MCInst &Inst, const OperandVector &Operands ) {
3909+ std::optional< unsigned >
3910+ AMDGPUAsmParser::checkVOPDRegBankConstraints ( const MCInst &Inst, bool AsVOPD3 ) {
38853911
38863912 const unsigned Opcode = Inst.getOpcode ();
38873913 if (!isVOPD (Opcode))
3888- return true ;
3914+ return {} ;
38893915
38903916 const MCRegisterInfo *TRI = getContext ().getRegisterInfo ();
38913917
@@ -3896,24 +3922,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
38963922 : MCRegister ();
38973923 };
38983924
3899- // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3900- bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3925+ // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3926+ // source-cache.
3927+ bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3928+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3929+ Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3930+ bool AllowSameVGPR = isGFX1250 ();
3931+
3932+ if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3933+ for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3934+ int I = getNamedOperandIdx (Opcode, OpName);
3935+ const MCOperand &Op = Inst.getOperand (I);
3936+ if (!Op.isImm ())
3937+ continue ;
3938+ int64_t Imm = Op.getImm ();
3939+ if (!AMDGPU::isInlinableLiteral32 (Imm, hasInv2PiInlineImm ()) &&
3940+ !AMDGPU::isInlinableLiteral64 (Imm, hasInv2PiInlineImm ()))
3941+ return (unsigned )I;
3942+ }
3943+
3944+ for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3945+ OpName::vsrc2Y, OpName::imm}) {
3946+ int I = getNamedOperandIdx (Opcode, OpName);
3947+ if (I == -1 )
3948+ continue ;
3949+ const MCOperand &Op = Inst.getOperand (I);
3950+ if (Op.isImm ())
3951+ return (unsigned )I;
3952+ }
3953+ }
39013954
39023955 const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
3903- auto InvalidCompOprIdx =
3904- InstInfo.getInvalidCompOperandIndex (getVRegIdx, SkipSrc);
3905- if (!InvalidCompOprIdx)
3956+ auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex (
3957+ getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3958+
3959+ return InvalidCompOprIdx;
3960+ }
3961+
3962+ bool AMDGPUAsmParser::validateVOPD (const MCInst &Inst,
3963+ const OperandVector &Operands) {
3964+
3965+ unsigned Opcode = Inst.getOpcode ();
3966+ bool AsVOPD3 = MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3;
3967+
3968+ if (AsVOPD3) {
3969+ for (unsigned I = 0 , E = Operands.size (); I != E; ++I) {
3970+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
3971+ if ((Op.isRegKind () || Op.isImmTy (AMDGPUOperand::ImmTyNone)) &&
3972+ (Op.getModifiers ().getFPModifiersOperand () & SISrcMods::ABS))
3973+ Error (Op.getStartLoc (), " ABS not allowed in VOPD3 instructions" );
3974+ }
3975+ }
3976+
3977+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, AsVOPD3);
3978+ if (!InvalidCompOprIdx.has_value ())
39063979 return true ;
39073980
39083981 auto CompOprIdx = *InvalidCompOprIdx;
3982+ const auto &InstInfo = getVOPDInstInfo (Opcode, &MII);
39093983 auto ParsedIdx =
39103984 std::max (InstInfo[VOPD::X].getIndexInParsedOperands (CompOprIdx),
39113985 InstInfo[VOPD::Y].getIndexInParsedOperands (CompOprIdx));
39123986 assert (ParsedIdx > 0 && ParsedIdx < Operands.size ());
39133987
39143988 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc ();
39153989 if (CompOprIdx == VOPD::Component::DST) {
3916- Error (Loc, " one dst register must be even and the other odd" );
3990+ if (AsVOPD3)
3991+ Error (Loc, " dst registers must be distinct" );
3992+ else
3993+ Error (Loc, " one dst register must be even and the other odd" );
39173994 } else {
39183995 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
39193996 Error (Loc, Twine (" src" ) + Twine (CompSrcIdx) +
@@ -3923,6 +4000,75 @@ bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
39234000 return false ;
39244001}
39254002
4003+ // \returns true if \p Inst does not satisfy VOPD constraints, but can be
4004+ // potentially used as VOPD3 with the same operands.
4005+ bool AMDGPUAsmParser::tryVOPD3 (const MCInst &Inst) {
4006+ // First check if it fits VOPD
4007+ auto InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, false );
4008+ if (!InvalidCompOprIdx.has_value ())
4009+ return false ;
4010+
4011+ // Then if it fits VOPD3
4012+ InvalidCompOprIdx = checkVOPDRegBankConstraints (Inst, true );
4013+ if (InvalidCompOprIdx.has_value ()) {
4014+ // If failed operand is dst it is better to show error about VOPD3
4015+ // instruction as it has more capabilities and error message will be
4016+ // more informative. If the dst is not legal for VOPD3, then it is not
4017+ // legal for VOPD either.
4018+ if (*InvalidCompOprIdx == VOPD::Component::DST)
4019+ return true ;
4020+
4021+ // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4022+ // with a conflict in tied implicit src2 of fmac and no asm operand to
4023+ // to point to.
4024+ return false ;
4025+ }
4026+ return true ;
4027+ }
4028+
4029+ // \returns true is a VOPD3 instruction can be also represented as a shorter
4030+ // VOPD encoding.
4031+ bool AMDGPUAsmParser::tryVOPD (const MCInst &Inst) {
4032+ const unsigned Opcode = Inst.getOpcode ();
4033+ const auto &II = getVOPDInstInfo (Opcode, &MII);
4034+ unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily (getSTI ());
4035+ if (!getCanBeVOPD (II[VOPD::X].getOpcode (), EncodingFamily, false ).X ||
4036+ !getCanBeVOPD (II[VOPD::Y].getOpcode (), EncodingFamily, false ).Y )
4037+ return false ;
4038+
4039+ // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4040+ // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4041+ // be parsed as VOPD which does not accept src2.
4042+ if (II[VOPD::X].getOpcode () == AMDGPU::V_CNDMASK_B32_e32 ||
4043+ II[VOPD::Y].getOpcode () == AMDGPU::V_CNDMASK_B32_e32)
4044+ return false ;
4045+
4046+ // If any modifiers are set this cannot be VOPD.
4047+ for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4048+ OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4049+ OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4050+ int I = getNamedOperandIdx (Opcode, OpName);
4051+ if (I == -1 )
4052+ continue ;
4053+ if (Inst.getOperand (I).getImm ())
4054+ return false ;
4055+ }
4056+
4057+ return !tryVOPD3 (Inst);
4058+ }
4059+
4060+ // VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4061+ // form but switch to VOPD3 otherwise.
4062+ bool AMDGPUAsmParser::tryAnotherVOPDEncoding (const MCInst &Inst) {
4063+ const unsigned Opcode = Inst.getOpcode ();
4064+ if (!isGFX1250 () || !isVOPD (Opcode))
4065+ return false ;
4066+
4067+ if (MII.get (Opcode).TSFlags & SIInstrFlags::VOPD3)
4068+ return tryVOPD (Inst);
4069+ return tryVOPD3 (Inst);
4070+ }
4071+
39264072bool AMDGPUAsmParser::validateIntClampSupported (const MCInst &Inst) {
39274073
39284074 const unsigned Opc = Inst.getOpcode ();
@@ -5243,7 +5389,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
52435389 if (!validateConstantBusLimitations (Inst, Operands)) {
52445390 return false ;
52455391 }
5246- if (!validateVOPDRegBankConstraints (Inst, Operands)) {
5392+ if (!validateVOPD (Inst, Operands)) {
52475393 return false ;
52485394 }
52495395 if (!validateIntClampSupported (Inst)) {
@@ -9244,8 +9390,14 @@ ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
92449390
92459391// Create VOPD MCInst operands using parsed assembler operands.
92469392void AMDGPUAsmParser::cvtVOPD (MCInst &Inst, const OperandVector &Operands) {
9393+ const MCInstrDesc &Desc = MII.get (Inst.getOpcode ());
9394+
92479395 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
92489396 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9397+ if (isRegOrImmWithInputMods (Desc, Inst.getNumOperands ())) {
9398+ Op.addRegOrImmWithFPInputModsOperands (Inst, 2 );
9399+ return ;
9400+ }
92499401 if (Op.isReg ()) {
92509402 Op.addRegOperands (Inst, 1 );
92519403 return ;
@@ -9274,6 +9426,17 @@ void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
92749426 if (CInfo.hasSrc2Acc ())
92759427 addOp (CInfo.getIndexOfDstInParsedOperands ());
92769428 }
9429+
9430+ int BitOp3Idx =
9431+ AMDGPU::getNamedOperandIdx (Inst.getOpcode (), AMDGPU::OpName::bitop3);
9432+ if (BitOp3Idx != -1 ) {
9433+ OptionalImmIndexMap OptIdx;
9434+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back ());
9435+ if (Op.isImm ())
9436+ OptIdx[Op.getImmTy ()] = Operands.size () - 1 ;
9437+
9438+ addOptionalImmOperand (Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9439+ }
92779440}
92789441
92799442// ===----------------------------------------------------------------------===//
0 commit comments