Skip to content

Commit b4aa3d3

Browse files
authored
[NFC] Check operand type instead of opcode (#168641)
A folow-up of #168458.
1 parent a3ab110 commit b4aa3d3

File tree

2 files changed

+14
-22
lines changed

2 files changed

+14
-22
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1423,7 +1423,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
14231423
/// \returns true if the target has packed f32 instructions that only read 32
14241424
/// bits from a scalar operand (SGPR or literal) and replicates the bits to
14251425
/// both channels.
1426-
bool hasPKF32InstsReplicatingLow32BitsOfScalarInput() const {
1426+
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const {
14271427
return getGeneration() == GFX12 && GFX1250Insts;
14281428
}
14291429

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -766,29 +766,21 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
766766
FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
767767
}
768768

769-
// Returns true if the instruction is a packed f32 instruction that only reads
770-
// 32 bits from a scalar operand (SGPR or literal) and replicates the bits to
771-
// both channels.
772-
static bool
773-
isPKF32InstrReplicatingLow32BitsOfScalarInput(const GCNSubtarget *ST,
774-
MachineInstr *MI) {
775-
if (!ST->hasPKF32InstsReplicatingLow32BitsOfScalarInput())
769+
// Returns true if the instruction is a packed F32 instruction and the
770+
// corresponding scalar operand reads 32 bits and replicates the bits to both
771+
// channels.
772+
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(
773+
const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo) {
774+
if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
776775
return false;
777-
switch (MI->getOpcode()) {
778-
case AMDGPU::V_PK_ADD_F32:
779-
case AMDGPU::V_PK_MUL_F32:
780-
case AMDGPU::V_PK_FMA_F32:
781-
return true;
782-
default:
783-
return false;
784-
}
785-
llvm_unreachable("unknown instruction");
776+
const MCOperandInfo &OpDesc = MI->getDesc().operands()[OpNo];
777+
return OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
786778
}
787779

788780
// Packed FP32 instructions only read 32 bits from a scalar operand (SGPR or
789781
// literal) and replicates the bits to both channels. Therefore, if the hi and
790782
// lo are not same, we can't fold it.
791-
static bool checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(
783+
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(
792784
const FoldableDef &OpToFold) {
793785
assert(OpToFold.isImm() && "Expected immediate operand");
794786
uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
@@ -953,8 +945,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
953945
// Special case for PK_F32 instructions if we are trying to fold an imm to
954946
// src0 or src1.
955947
if (OpToFold.isImm() &&
956-
isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, MI) &&
957-
!checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
948+
isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, MI, OpNo) &&
949+
!checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
958950
return false;
959951

960952
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
@@ -1172,8 +1164,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
11721164
return false;
11731165

11741166
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
1175-
if (isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, UseMI) &&
1176-
!checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
1167+
if (isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, UseMI, UseOpIdx) &&
1168+
!checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
11771169
return false;
11781170
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
11791171
return true;

0 commit comments

Comments
 (0)