Skip to content

Conversation

@shiltian
Copy link
Contributor

A folow-up of #168458.

Copy link
Contributor Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@shiltian shiltian requested review from arsenm and rampitec November 19, 2025 00:41
@llvmbot
Copy link
Member

llvmbot commented Nov 19, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

Changes

A folow-up of #168458.


Full diff: https://github.com/llvm/llvm-project/pull/168641.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+1-1)
  • (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+13-21)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ca98b80787fb4..a87f9f274a4d3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1423,7 +1423,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// \returns true if the target has packed f32 instructions that only read 32
   /// bits from a scalar operand (SGPR or literal) and replicates the bits to
   /// both channels.
-  bool hasPKF32InstsReplicatingLow32BitsOfScalarInput() const {
+  bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const {
     return getGeneration() == GFX12 && GFX1250Insts;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 289bf1a563ffc..2df9267cde1f2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -766,29 +766,21 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
                       FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
 }
 
-// Returns true if the instruction is a packed f32 instruction that only reads
-// 32 bits from a scalar operand (SGPR or literal) and replicates the bits to
-// both channels.
-static bool
-isPKF32InstrReplicatingLow32BitsOfScalarInput(const GCNSubtarget *ST,
-                                              MachineInstr *MI) {
-  if (!ST->hasPKF32InstsReplicatingLow32BitsOfScalarInput())
+// Returns true if the instruction is a packed F32 instruction and the
+// corresponding scalar operand reads 32 bits and replicates the bits to both
+// channels.
+static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(
+    const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo) {
+  if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
     return false;
-  switch (MI->getOpcode()) {
-  case AMDGPU::V_PK_ADD_F32:
-  case AMDGPU::V_PK_MUL_F32:
-  case AMDGPU::V_PK_FMA_F32:
-    return true;
-  default:
-    return false;
-  }
-  llvm_unreachable("unknown instruction");
+  const MCOperandInfo &OpDesc = MI->getDesc().operands()[OpNo];
+  return OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
 }
 
 // Packed FP32 instructions only read 32 bits from a scalar operand (SGPR or
 // literal) and replicates the bits to both channels. Therefore, if the hi and
 // lo are not same, we can't fold it.
-static bool checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(
+static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(
     const FoldableDef &OpToFold) {
   assert(OpToFold.isImm() && "Expected immediate operand");
   uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
@@ -953,8 +945,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
   // Special case for PK_F32 instructions if we are trying to fold an imm to
   // src0 or src1.
   if (OpToFold.isImm() &&
-      isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, MI) &&
-      !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
+      isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, MI, OpNo) &&
+      !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
     return false;
 
   appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
@@ -1172,8 +1164,8 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
     return false;
 
   if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
-    if (isPKF32InstrReplicatingLow32BitsOfScalarInput(ST, UseMI) &&
-        !checkImmOpForPKF32InstrReplicatingLow32BitsOfScalarInput(OpToFold))
+    if (isPKF32InstrReplicatesLower32BitsOfScalarOperand(ST, UseMI, UseOpIdx) &&
+        !checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(OpToFold))
       return false;
     appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
     return true;

@github-actions
Copy link

🐧 Linux x64 Test Results

  • 186344 tests passed
  • 4859 tests skipped

@shiltian shiltian merged commit b4aa3d3 into main Nov 19, 2025
10 of 11 checks passed
@shiltian shiltian deleted the users/shiltian/check-op-type-instead-of-opcode branch November 19, 2025 02:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants