-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Add commute for some VOP3 inst #121326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
f0877bb
ae5f1e8
df6903c
ada83d6
c599af0
3a83ae5
5c9d065
d3f00dc
0e60298
707474f
79219d1
288ead2
4becc58
53b370a
0b746a8
785921f
6955a86
5e15e72
004a82d
dc2739f
378c02c
49ae569
161a2b9
4d569ae
1689c1e
328e566
9faf423
19b8ad4
cc3a125
0a89dc9
d8e6cb7
3c7bd89
bf1da57
789092d
6c35280
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2749,6 +2749,67 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, | |
| return &MI; | ||
| } | ||
|
|
||
| static MachineInstr *swapImmOperands(MachineInstr &MI, | ||
| MachineOperand &NonRegOp1, | ||
| MachineOperand &NonRegOp2) { | ||
| unsigned TargetFlags = NonRegOp1.getTargetFlags(); | ||
| int64_t NonRegVal = NonRegOp1.getImm(); | ||
|
|
||
| NonRegOp1.setImm(NonRegOp2.getImm()); | ||
| NonRegOp2.setImm(NonRegVal); | ||
| NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags()); | ||
| NonRegOp2.setTargetFlags(TargetFlags); | ||
| return &MI; | ||
| } | ||
|
|
||
| bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0, | ||
| const MachineOperand *MO0, unsigned OpIdx1, | ||
| const MachineOperand *MO1) const { | ||
| const MCInstrDesc &InstDesc = MI.getDesc(); | ||
| const MCOperandInfo &OpInfo0 = InstDesc.operands()[OpIdx0]; | ||
| const MCOperandInfo &OpInfo1 = InstDesc.operands()[OpIdx1]; | ||
| const TargetRegisterClass *DefinedRC1 = | ||
| OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo1.RegClass) : nullptr; | ||
| const TargetRegisterClass *DefinedRC0 = | ||
| OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo0.RegClass) : nullptr; | ||
|
|
||
| unsigned Opc = MI.getOpcode(); | ||
| int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); | ||
| if (Src0Idx == -1) { | ||
| // VOPD V_DUAL_* instructions use different operand names. | ||
| Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0X); | ||
| } | ||
|
|
||
| // Swap doesn't breach constant bus or literal limits | ||
| // It may move literal to position other than src0, this is not allowed | ||
| // pre-gfx10 However, most test cases need literals in Src0 for VOP | ||
| // FIXME: After gfx9, literal can be in place other than Src0 | ||
| if (isVALU(MI)) { | ||
| if ((int)OpIdx0 == Src0Idx && !MO0->isReg() && | ||
| !isInlineConstant(*MO0, OpInfo1)) | ||
| return false; | ||
| if ((int)OpIdx1 == Src0Idx && !MO1->isReg() && | ||
| !isInlineConstant(*MO1, OpInfo0)) | ||
| return false; | ||
| } | ||
|
|
||
| if (OpIdx1 != Src0Idx && MO0->isReg()) { | ||
| if (!DefinedRC1) | ||
| return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN; | ||
| return isLegalRegOperand(MI, OpIdx1, *MO0); | ||
| } | ||
| if (OpIdx0 != Src0Idx && MO1->isReg()) { | ||
| if (!DefinedRC0) | ||
| return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN; | ||
| return isLegalRegOperand(MI, OpIdx0, *MO1); | ||
| } | ||
|
|
||
| // No need to check 64-bit literals since swapping does not bring new | ||
| // 64-bit literals into current instruction to fold to 32-bit | ||
|
|
||
| return isImmOperandLegal(MI, OpIdx1, *MO0); | ||
| } | ||
|
|
||
| MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, | ||
| unsigned Src0Idx, | ||
| unsigned Src1Idx) const { | ||
|
|
@@ -2770,21 +2831,20 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, | |
|
|
||
| MachineOperand &Src0 = MI.getOperand(Src0Idx); | ||
| MachineOperand &Src1 = MI.getOperand(Src1Idx); | ||
|
|
||
| if (!isLegalToSwap(MI, Src0Idx, &Src0, Src1Idx, &Src1)) { | ||
| return nullptr; | ||
| } | ||
| MachineInstr *CommutedMI = nullptr; | ||
| if (Src0.isReg() && Src1.isReg()) { | ||
| if (isOperandLegal(MI, Src1Idx, &Src0)) { | ||
| // Be sure to copy the source modifiers to the right place. | ||
| CommutedMI | ||
| = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); | ||
| } | ||
|
|
||
| // Be sure to copy the source modifiers to the right place. | ||
| CommutedMI = | ||
| TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); | ||
| } else if (Src0.isReg() && !Src1.isReg()) { | ||
| if (isOperandLegal(MI, Src1Idx, &Src0)) | ||
| CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); | ||
| CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); | ||
| } else if (!Src0.isReg() && Src1.isReg()) { | ||
| if (isOperandLegal(MI, Src1Idx, &Src0)) | ||
| CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); | ||
| CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); | ||
| } else if (Src0.isImm() && Src1.isImm()) { | ||
| CommutedMI = swapImmOperands(MI, Src0, Src1); | ||
| } else { | ||
| // FIXME: Found two non registers to commute. This does happen. | ||
| return nullptr; | ||
|
|
@@ -5817,6 +5877,49 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, | |
| return RC->hasSuperClassEq(DRC); | ||
| } | ||
|
|
||
| bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, | ||
| const MachineOperand &MO) const { | ||
| const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); | ||
| const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx]; | ||
| unsigned Opc = MI.getOpcode(); | ||
|
|
||
| if (!isLegalRegOperand(MRI, OpInfo, MO)) | ||
| return false; | ||
|
|
||
| // check Accumulate GPR operand | ||
| bool IsAGPR = RI.isAGPR(MRI, MO.getReg()); | ||
| if (IsAGPR && !ST.hasMAIInsts()) | ||
| return false; | ||
| if (IsAGPR && (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This !MRI.reservedRegsFrozen() doesn't make any sense but all of this code looks copied directly from existing code
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, I just wrap it from the isoperandlegal function to avoid duplicate code. Should I try to remove it??
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not here |
||
| (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) | ||
| return false; | ||
| // Atomics should have both vdst and vdata either vgpr or agpr. | ||
| const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); | ||
| const int DataIdx = AMDGPU::getNamedOperandIdx( | ||
| Opc, isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); | ||
| if ((int)OpIdx == VDstIdx && DataIdx != -1 && | ||
| MI.getOperand(DataIdx).isReg() && | ||
| RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) | ||
| return false; | ||
| if ((int)OpIdx == DataIdx) { | ||
| if (VDstIdx != -1 && | ||
| RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) | ||
| return false; | ||
| // DS instructions with 2 src operands also must have tied RC. | ||
| const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1); | ||
| if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && | ||
| RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) | ||
| return false; | ||
| } | ||
|
|
||
| // Check V_ACCVGPR_WRITE_B32_e64 | ||
| if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && | ||
| (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && | ||
| RI.isSGPRReg(MRI, MO.getReg())) | ||
| return false; | ||
| return true; | ||
| } | ||
|
|
||
| bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, | ||
| const MCOperandInfo &OpInfo, | ||
| const MachineOperand &MO) const { | ||
|
|
@@ -5879,40 +5982,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, | |
| if (MO->isReg()) { | ||
| if (!DefinedRC) | ||
| return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; | ||
| if (!isLegalRegOperand(MRI, OpInfo, *MO)) | ||
| return false; | ||
| bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); | ||
| if (IsAGPR && !ST.hasMAIInsts()) | ||
| return false; | ||
| unsigned Opc = MI.getOpcode(); | ||
| if (IsAGPR && | ||
| (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) && | ||
| (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc))) | ||
| return false; | ||
| // Atomics should have both vdst and vdata either vgpr or agpr. | ||
| const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); | ||
| const int DataIdx = AMDGPU::getNamedOperandIdx(Opc, | ||
| isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); | ||
| if ((int)OpIdx == VDstIdx && DataIdx != -1 && | ||
| MI.getOperand(DataIdx).isReg() && | ||
| RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR) | ||
| return false; | ||
| if ((int)OpIdx == DataIdx) { | ||
| if (VDstIdx != -1 && | ||
| RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR) | ||
| return false; | ||
| // DS instructions with 2 src operands also must have tied RC. | ||
| const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, | ||
| AMDGPU::OpName::data1); | ||
| if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() && | ||
| RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR) | ||
| return false; | ||
| } | ||
| if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() && | ||
| (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) && | ||
| RI.isSGPRReg(MRI, MO->getReg())) | ||
| return false; | ||
| return true; | ||
| return isLegalRegOperand(MI, OpIdx, *MO); | ||
| } | ||
|
|
||
| if (MO->isImm()) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,6 @@ name: not_shrink_icmp | |
| body: | | ||
| bb.0: | ||
| ; GCN-LABEL: name: not_shrink_icmp | ||
| ; GCN: S_CMP_GT_I32 1, 65, implicit-def $scc | ||
| ; GCN: S_CMP_LT_I32 65, 1, implicit-def $scc | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pointless swap but probably not this patch's problem |
||
| S_CMP_GT_I32 1, 65, implicit-def $scc | ||
| ... | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This VOPD case is still missing a test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @arsenm , I re-searched all generated instruction, but cannot find a VOPD instruction that is commutable with operand names as $src0X...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then remove this handling?