Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1423,7 +1423,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the target has packed f32 instructions that only read 32
/// bits from a scalar operand (SGPR or literal) and replicates the bits to
/// both channels.
bool hasPKF32InstsReplicatingLow32BitsOfScalarInput() const {
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const {
return getGeneration() == GFX12 && GFX1250Insts;
}

Expand Down
34 changes: 13 additions & 21 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -766,29 +766,21 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
}

// Returns true if the instruction is a packed f32 instruction that only reads
// 32 bits from a scalar operand (SGPR or literal) and replicates the bits to
// both channels.
static bool
isPKF32InstrReplicatingLow32BitsOfScalarInput(const GCNSubtarget *ST,
MachineInstr *MI) {
if (!ST->hasPKF32InstsReplicatingLow32BitsOfScalarInput())
// Returns true if the instruction is a packed F32 instruction and the
// corresponding scalar operand reads 32 bits and replicates the bits to both
// channels.
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(
const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo) {
if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
return false;
switch (MI->getOpcode()) {
case AMDGPU::V_PK_ADD_F32:
case AMDGPU::V_PK_MUL_F32:
case AMDGPU::V_PK_FMA_F32:
return true;
default:
return false;
}
llvm_unreachable("unknown instruction");
const MCOperandInfo &OpDesc = MI->getDesc().operands()[OpNo];
return OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
}

// Packed FP32 instructions only read 32 bits from a scalar operand (SGPR or
// literal) and replicates the bits to both channels. Therefore, if the hi and
// lo are not same, we can't fold it.
static bool checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(
const FoldableDef &OpToFold) {
assert(OpToFold.isImm() && "Expected immediate operand");
uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
Expand Down Expand Up @@ -953,8 +945,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
// Special case for PK_F32 instructions if we are trying to fold an imm to
// src0 or src1.
if (OpToFold.isImm() &&
isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, MI) &&
!checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, MI, OpNo) &&
!checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
return false;

appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
Expand Down Expand Up @@ -1172,8 +1164,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;

if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
if (isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, UseMI) &&
!checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
if (isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, UseMI, UseOpIdx) &&
!checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
return false;
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
return true;
Expand Down