@@ -766,29 +766,21 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
766766 FoldCandidate (MI, OpNo, FoldOp, Commuted, ShrinkOp));
767767}
768768
769- // Returns true if the instruction is a packed f32 instruction that only reads
770- // 32 bits from a scalar operand (SGPR or literal) and replicates the bits to
771- // both channels.
772- static bool
773- isPKF32InstrReplicatingLow32BitsOfScalarInput (const GCNSubtarget *ST,
774- MachineInstr *MI) {
775- if (!ST->hasPKF32InstsReplicatingLow32BitsOfScalarInput ())
769+ // Returns true if the instruction is a packed F32 instruction and the
770+ // corresponding scalar operand reads 32 bits and replicates the bits to both
771+ // channels.
772+ static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand (
773+ const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo) {
774+ if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput ())
776775 return false ;
777- switch (MI->getOpcode ()) {
778- case AMDGPU::V_PK_ADD_F32:
779- case AMDGPU::V_PK_MUL_F32:
780- case AMDGPU::V_PK_FMA_F32:
781- return true ;
782- default :
783- return false ;
784- }
785- llvm_unreachable (" unknown instruction" );
776+ const MCOperandInfo &OpDesc = MI->getDesc ().operands ()[OpNo];
777+ return OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
786778}
787779
788780// Packed FP32 instructions only read 32 bits from a scalar operand (SGPR or
789781// literal) and replicates the bits to both channels. Therefore, if the hi and
790782// lo are not same, we can't fold it.
791- static bool checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput (
783+ static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand (
792784 const FoldableDef &OpToFold) {
793785 assert (OpToFold.isImm () && " Expected immediate operand" );
794786 uint64_t ImmVal = OpToFold.getEffectiveImmVal ().value ();
@@ -953,8 +945,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
953945 // Special case for PK_F32 instructions if we are trying to fold an imm to
954946 // src0 or src1.
955947 if (OpToFold.isImm () &&
956- isPKF32InstrReplicatingLow32BitsOfScalarInput (ST, MI) &&
957- !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput (OpToFold))
948+ isPKF32InstrReplicatesLower32BitsOfScalarOperand (ST, MI, OpNo ) &&
949+ !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand (OpToFold))
958950 return false ;
959951
960952 appendFoldCandidate (FoldList, MI, OpNo, OpToFold);
@@ -1172,8 +1164,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
11721164 return false ;
11731165
11741166 if (OpToFold.isImm () && OpToFold.isOperandLegal (*TII, *UseMI, UseOpIdx)) {
1175- if (isPKF32InstrReplicatingLow32BitsOfScalarInput (ST, UseMI) &&
1176- !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput (OpToFold))
1167+ if (isPKF32InstrReplicatesLower32BitsOfScalarOperand (ST, UseMI, UseOpIdx ) &&
1168+ !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand (OpToFold))
11771169 return false ;
11781170 appendFoldCandidate (FoldList, UseMI, UseOpIdx, OpToFold);
11791171 return true ;
0 commit comments