Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f0877bb
add commute for some VOP3 inst, allow commute for both inline constan…
Shoreshen Dec 30, 2024
ae5f1e8
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Dec 31, 2024
df6903c
Merge remote-tracking branch 'origin/main' into Add-isCommutable-attr…
Shoreshen Dec 31, 2024
ada83d6
add inline constant case & merge main
Shoreshen Dec 31, 2024
c599af0
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 2, 2025
3a83ae5
fix lit change
Shoreshen Jan 2, 2025
5c9d065
fix lit change
Shoreshen Jan 2, 2025
d3f00dc
Update llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Shoreshen Jan 2, 2025
0e60298
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 2, 2025
707474f
fix comments
Shoreshen Jan 2, 2025
79219d1
fix
Shoreshen Jan 2, 2025
288ead2
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 4, 2025
4becc58
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 5, 2025
53b370a
Add legal check for swap
Shoreshen Jan 6, 2025
0b746a8
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 6, 2025
785921f
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 7, 2025
6955a86
Merge remote-tracking branch 'origin/main' into Add-isCommutable-attr…
Shoreshen Jan 8, 2025
5e15e72
add tests & merge_main
Shoreshen Jan 8, 2025
004a82d
fix lit.cfg.py
Shoreshen Jan 8, 2025
dc2739f
fix lit.cfg.py
Shoreshen Jan 8, 2025
378c02c
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 8, 2025
49ae569
Merge remote-tracking branch 'origin/main' into Add-isCommutable-attr…
Shoreshen Jan 9, 2025
161a2b9
adjust comment & merge main
Shoreshen Jan 9, 2025
4d569ae
Merge remote-tracking branch 'origin/main' into Add-isCommutable-attr…
Shoreshen Jan 10, 2025
1689c1e
adjust case & merge main
Shoreshen Jan 10, 2025
328e566
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 13, 2025
9faf423
fix inconsistent capitalization
Shoreshen Jan 13, 2025
19b8ad4
fix test case
Shoreshen Jan 13, 2025
cc3a125
Merge remote-tracking branch 'origin/main' into Add-isCommutable-attr…
Shoreshen Jan 20, 2025
0a89dc9
merge main
Shoreshen Jan 20, 2025
d8e6cb7
fix format
Shoreshen Jan 20, 2025
3c7bd89
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 21, 2025
bf1da57
remove special handling for VOPD, since no commutable VOPD instruction
Shoreshen Jan 21, 2025
789092d
Merge branch 'llvm:main' into Add-isCommutable-attribute-to-VOP3-inst…
Shoreshen Jan 21, 2025
6c35280
Merge branch 'main' into Add-isCommutable-attribute-to-VOP3-instructions
Shoreshen Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2752,8 +2752,8 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
static MachineInstr *swapInlineConstOperands(MachineInstr &MI,
MachineOperand &NonRegOp1,
MachineOperand &NonRegOp2) {
auto TargetFlags = NonRegOp1.getTargetFlags();
auto NonRegVal = NonRegOp1.getImm();
unsigned TargetFlags = NonRegOp1.getTargetFlags();
int64_t NonRegVal = NonRegOp1.getImm();

NonRegOp1.setImm(NonRegOp2.getImm());
NonRegOp2.setImm(NonRegVal);
Expand Down Expand Up @@ -2798,7 +2798,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
} else if (!Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else if (isInlineConstant(Src0) && isInlineConstant(Src1)) {
} else if (isInlineConstant(Src1)) {
// If Src1 is inline constant and Src0 is not, then isOperandLegal rejects
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't explaining why to do this, but this is also an API flaw that's always been there. We need an isOperandLegal that doesn't account for the context of the other operands

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or rather, one that takes the full set of operands that need to be considered for the result instruction

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @arsenm

This isn't explaining why to do this, but this is also an API flaw that's always been there. We need an isOperandLegal that doesn't account for the context of the other operands

isOperandLegal also check the literal and constant bus (literal or SGPR) limit, I think maybe it is better to separate them from the function. It will also cause some in-consistency with ISA (e.g. reject instructions with 1 literal and 1 imm) if the OpIdx and MO's index are different

Or rather, one that takes the full set of operands that need to be considered for the result instruction

We can do that, but to create the result instruction, we actually swapped the operands on the original instruction too. So if I do that and failed the isOperandLegal check, I would need to swap it back

Copy link
Contributor Author

@Shoreshen Shoreshen Jan 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or rather, one that takes the full set of operands that need to be considered for the result instruction

Hi @arsenm , I tried to check the swapped instruction with isOperandLegal, it fails 100+ cases. The following is the change:

MachineInstr* SIInstrInfo::swapOperands(MachineInstr &MI, bool NewMI, 
                                        unsigned Src0Idx,
                                        unsigned Src1Idx,
                                        MachineOperand &Src0,
                                        MachineOperand &Src1) const {
  MachineInstr *CommutedMI = nullptr;

  if (Src0.isReg() && Src1.isReg()) {
    // Be sure to copy the source modifiers to the right place.
    CommutedMI
        = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
  } else if (Src0.isReg() && !Src1.isReg()) {
    CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
  } else if (!Src0.isReg() && Src1.isReg()) {
    CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
  } else if (Src0.isImm() && Src1.isImm()) {
    CommutedMI = swapImmOperands(MI, Src0, Src1);
  }

  return CommutedMI;
}

MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                  unsigned Src0Idx,
                                                  unsigned Src1Idx) const {
  assert(!NewMI && "this should never be used");

  unsigned Opc = MI.getOpcode();
  int CommutedOpcode = commuteOpcode(Opc);
  if (CommutedOpcode == -1)
    return nullptr;

  if (Src0Idx > Src1Idx)
    std::swap(Src0Idx, Src1Idx);

  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
           static_cast<int>(Src0Idx) &&
         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
           static_cast<int>(Src1Idx) &&
         "inconsistency with findCommutedOpIndices");

  MachineOperand &Src0 = MI.getOperand(Src0Idx);
  MachineOperand &Src1 = MI.getOperand(Src1Idx);
  MachineInstr *CommutedMI = swapOperands(MI, NewMI, Src0Idx, Src1Idx, Src0, Src1);
  if (!CommutedMI)
    return nullptr;
  if (!isOperandLegal(*CommutedMI, Src1Idx, &CommutedMI->getOperand(Src1Idx))) {
    // swap back if failed check
    swapOperands(MI, NewMI, Src0Idx, Src1Idx, Src0, Src1);
    return nullptr;
  }

  if (CommutedMI) {
    swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
                        Src1, AMDGPU::OpName::src1_modifiers);

    swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_sel, Src1,
                        AMDGPU::OpName::src1_sel);

    CommutedMI->setDesc(get(CommutedOpcode));
  }

  return CommutedMI;
}

The reason for that is the VALU instructions with literal in the first input operands.

The commuteInstruction function was called during the shrink instruction pass, and mismatched OpIdx and MO (first operand) will be the parameter of isOperandLegal. It return false because MO was counted for 2 times of literal limit (explained in the other reply).

Now if using the swapped instruction, the mismatch no longer exists, then it will move all literal constant to the second input operand for all VALU instructions.

Shall I fix all the cases in this PR?? or fix them in a new issue??

if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapInlineConstOperands(MI, Src0, Src1);
} else {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,9 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
let FPDPRounding = 1 in {
let Predicates = [Has16BitInsts, isGFX8Only] in {
defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
let isCommutable = 1 in
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
let isCommutable = 1 in {
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
} // End isCommutable = 1
} // End Predicates = [Has16BitInsts, isGFX8Only]

let SubtargetPredicate = isGFX9Plus in {
Expand Down Expand Up @@ -1257,8 +1258,9 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
let isCommutable = 1 in
defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
let isCommutable = 1 in {
defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
} // End isCommutable = 1
defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;

} // End SubtargetPredicate = isGFX10Plus
Expand Down