diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ca98b80787fb4..a87f9f274a4d3 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1423,7 +1423,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \returns true if the target has packed f32 instructions that only read 32 /// bits from a scalar operand (SGPR or literal) and replicates the bits to /// both channels. - bool hasPKF32InstsReplicatingLow32BitsOfScalarInput() const { + bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const { return getGeneration() == GFX12 && GFX1250Insts; } diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 289bf1a563ffc..2df9267cde1f2 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -766,29 +766,21 @@ static void appendFoldCandidate(SmallVectorImpl &FoldList, FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp)); } -// Returns true if the instruction is a packed f32 instruction that only reads -// 32 bits from a scalar operand (SGPR or literal) and replicates the bits to -// both channels. -static bool -isPKF32InstrReplicatingLow32BitsOfScalarInput(const GCNSubtarget *ST, - MachineInstr *MI) { - if (!ST->hasPKF32InstsReplicatingLow32BitsOfScalarInput()) +// Returns true if the instruction is a packed F32 instruction and the +// corresponding scalar operand reads 32 bits and replicates the bits to both +// channels. +static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand( + const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo) { + if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput()) return false; - switch (MI->getOpcode()) { - case AMDGPU::V_PK_ADD_F32: - case AMDGPU::V_PK_MUL_F32: - case AMDGPU::V_PK_FMA_F32: - return true; - default: - return false; - } - llvm_unreachable("unknown instruction"); + const MCOperandInfo &OpDesc = MI->getDesc().operands()[OpNo]; + return OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32; } // Packed FP32 instructions only read 32 bits from a scalar operand (SGPR or // literal) and replicates the bits to both channels. Therefore, if the hi and // lo are not same, we can't fold it. -static bool checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput( +static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand( const FoldableDef &OpToFold) { assert(OpToFold.isImm() && "Expected immediate operand"); uint64_t ImmVal = OpToFold.getEffectiveImmVal().value(); @@ -953,8 +945,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList( // Special case for PK_F32 instructions if we are trying to fold an imm to // src0 or src1. if (OpToFold.isImm() && - isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, MI) && - !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold)) + isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, MI, OpNo) && + !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold)) return false; appendFoldCandidate(FoldList, MI, OpNo, OpToFold); @@ -1172,8 +1164,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm( return false; if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) { - if (isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, UseMI) && - !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold)) + if (isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, UseMI, UseOpIdx) && + !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold)) return false; appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold); return true;