Skip to content

Commit 8d0df57

Browse files
fhossein-quickaushik-quicincSantanu Das
authored
[Hexagon] Improve QFP Optimizer (#166647)
This patch enhances HexagonQFPOptimizer in multiple ways: 1. Refactor the code for better readability and maintainability. 2. Optimize vabs,vneg and vilog2 converts The three instruction mentioned can be optimized like below: ```v1.sf = v0.qf32 v2.qf = vneg v1.sf``` to ```v2.qf = vneg v0.qf32``` This optimization eliminates one conversion and is applicable to both qf32 and qf16 types. 3. Enable vsub fusion with mixed arguments Previously, QFPOptimizer did not fuse partial qfloat operands with vsub. This update allows selective use of vsub_hf_mix, vsub_sf_mix, vsub_qf16_mix, and vsub_qf32_mix when appropriate. It also enables QFP simplifications involving vector pair subregisters. Example scenario in a machine basic block targeting Hexagon: ```v1.qf32 = ... // result of a vadd v2.sf = v1.qf32 v3.qf32 = vmpy(v2.sf, v2.sf)``` 4. Remove redundant conversions Under certain conditions, we previously bailed out before removing qf-to-sf/hf conversions. This patch removes that bailout, enabling more aggressive elimination of unnecessary conversions. 5. Don't optimize equals feeding into multiply: Removing converts feeding into multiply loses precision. This patch avoids optimizing multiplies along with giving the users an option to enable this by a flag. Patch By: Fateme Hosseini Co-authored-by: Kaushik Kulkarni <[email protected]> Co-authored-by: Santanu Das <[email protected]>
1 parent 316236b commit 8d0df57

File tree

8 files changed

+860
-34
lines changed

8 files changed

+860
-34
lines changed

llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
47534753
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
47544754
}
47554755

4756+
bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
4757+
return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
4758+
MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
4759+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
4760+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
4761+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
4762+
MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
4763+
MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
4764+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
4765+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
4766+
MI->getOpcode() == Hexagon::V6_vmpy_qf32);
4767+
}
4768+
47564769
// Addressing mode relations.
47574770
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
47584771
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;

llvm/lib/Target/Hexagon/HexagonInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
532532
}
533533

534534
MCInst getNop() const override;
535+
bool isQFPMul(const MachineInstr *MF) const;
535536
};
536537

537538
/// \brief Create RegSubRegPair from a register MachineOperand

llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp

Lines changed: 114 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
// are PHI inst.
5959
//
6060
//===----------------------------------------------------------------------===//
61-
#include <unordered_set>
61+
6262
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
6363

6464
#include "Hexagon.h"
@@ -86,6 +86,9 @@ using namespace llvm;
8686
cl::opt<bool>
8787
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
8888
cl::desc("Disable optimization of Qfloat operations."));
89+
cl::opt<bool> DisableQFOptForMul(
90+
"disable-qfp-opt-mul", cl::init(true),
91+
cl::desc("Disable optimization of Qfloat operations for multiply."));
8992

9093
namespace {
9194
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
101104
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
102105
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
103106
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
104-
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
107+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
108+
{Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
109+
{Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
110+
{Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
111+
{Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
112+
{Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
113+
{Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
105114
} // namespace
106115

107-
namespace {
116+
namespace llvm {
117+
FunctionPass *createHexagonQFPOptimizer();
118+
void initializeHexagonQFPOptimizerPass(PassRegistry &);
119+
} // namespace llvm
108120

121+
namespace {
109122
struct HexagonQFPOptimizer : public MachineFunctionPass {
110123
public:
111124
static char ID;
@@ -116,6 +129,10 @@ struct HexagonQFPOptimizer : public MachineFunctionPass {
116129

117130
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
118131

132+
bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
133+
134+
bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
135+
119136
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
120137

121138
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() {
142159
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
143160
MachineBasicBlock *MBB) {
144161

145-
// Early exit:
146-
// - if instruction is invalid or has too few operands (QFP ops need 2 sources
147-
// + 1 dest),
148-
// - or does not have a transformation mapping.
149-
if (MI->getNumOperands() < 3)
162+
if (MI->getNumOperands() == 2)
163+
return optimizeQfpOneOp(MI, MBB);
164+
else if (MI->getNumOperands() == 3)
165+
return optimizeQfpTwoOp(MI, MBB);
166+
else
150167
return false;
168+
}
169+
170+
bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
171+
MachineBasicBlock *MBB) {
172+
173+
unsigned Op0F = 0;
151174
auto It = QFPInstMap.find(MI->getOpcode());
152175
if (It == QFPInstMap.end())
153176
return false;
177+
154178
unsigned short InstTy = It->second;
179+
// Get the reachind defs of MI
180+
MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
181+
MachineOperand &Res = MI->getOperand(0);
182+
if (!Res.isReg())
183+
return false;
184+
185+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
186+
MachineInstr *ReachDefDef = nullptr;
187+
188+
// Get the reaching def of the reaching def to check for W reg def
189+
if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
190+
DefMI->getOperand(1).getReg().isVirtual())
191+
ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
192+
unsigned ReachDefOp = DefMI->getOpcode();
193+
MachineInstrBuilder MIB;
194+
195+
// Check if the reaching def is a conversion
196+
if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
197+
ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
198+
199+
// Return if the reaching def of reaching def is W type
200+
if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
201+
&Hexagon::HvxWRRegClass)
202+
return false;
203+
204+
// Analyze the use operands of the conversion to get their KILL status
205+
MachineOperand &SrcOp = DefMI->getOperand(1);
206+
Op0F = getKillRegState(SrcOp.isKill());
207+
SrcOp.setIsKill(false);
208+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
209+
.addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
210+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
211+
return true;
212+
}
213+
return false;
214+
}
215+
216+
bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
217+
MachineBasicBlock *MBB) {
155218

156219
unsigned Op0F = 0;
157220
unsigned Op1F = 0;
221+
auto It = QFPInstMap.find(MI->getOpcode());
222+
if (It == QFPInstMap.end())
223+
return false;
224+
unsigned short InstTy = It->second;
158225
// Get the reaching defs of MI, DefMI1 and DefMI2
159226
MachineInstr *DefMI1 = nullptr;
160227
MachineInstr *DefMI2 = nullptr;
@@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
167234
return false;
168235

169236
MachineOperand &Res = MI->getOperand(0);
237+
if (!Res.isReg())
238+
return false;
239+
170240
MachineInstr *Inst1 = nullptr;
171241
MachineInstr *Inst2 = nullptr;
172242
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
@@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
185255
unsigned Def2OP = DefMI2->getOpcode();
186256

187257
MachineInstrBuilder MIB;
188-
// Case 1: Both reaching defs of MI are qf to sf/hf conversions
258+
259+
// Check if the both the reaching defs of MI are qf to sf/hf conversions
189260
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
190261
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
191262
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
226297
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
227298
return true;
228299

229-
// Case 2: Left operand is conversion to sf/hf
300+
// Check if left operand's reaching def is a conversion to sf/hf
230301
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
231302
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
232303
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -250,21 +321,14 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
250321
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
251322
return true;
252323

253-
// Case 2: Left operand is conversion to sf/hf
324+
// Check if right operand's reaching def is a conversion to sf/hf
254325
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
255326
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
256327
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
257328
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
258329
!DefMI1->isPHI() &&
259330
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
260331
// The second operand of original instruction is converted.
261-
// In "mix" instructions, "qf" operand is always the first operand.
262-
263-
// Caveat: vsub is not commutative w.r.t operands.
264-
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
265-
InstTy == Hexagon::V6_vsub_qf32_mix)
266-
return false;
267-
268332
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
269333
&Hexagon::HvxWRRegClass)
270334
return false;
@@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
275339
Op1F = getKillRegState(Src2.isKill());
276340
Src2.setIsKill(false);
277341
Op0F = getKillRegState(Src1.isKill());
278-
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
279-
.addReg(Src2.getReg(), Op1F,
280-
Src2.getSubReg()) // Notice the operands are flipped.
281-
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
342+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
343+
InstTy == Hexagon::V6_vsub_qf32_mix) {
344+
if (!HST->useHVXV81Ops())
345+
// vsub_(hf|sf)_mix insts are only avlbl on hvx81+
346+
return false;
347+
// vsub is not commutative w.r.t. operands -> treat it as a special case
348+
// to choose the correct mix instruction.
349+
if (Def2OP == Hexagon::V6_vconv_sf_qf32)
350+
InstTy = Hexagon::V6_vsub_sf_mix;
351+
else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
352+
InstTy = Hexagon::V6_vsub_hf_mix;
353+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
354+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
355+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
356+
} else {
357+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
358+
.addReg(Src2.getReg(), Op1F,
359+
Src2.getSubReg()) // Notice the operands are flipped.
360+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
361+
}
282362
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
283363
return true;
284364
}
@@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
309389
while (MII != MBBI->instr_end()) {
310390
MachineInstr *MI = &*MII;
311391
++MII; // As MI might be removed.
312-
313-
if (QFPInstMap.count(MI->getOpcode()) &&
314-
MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
315-
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
316-
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
317-
if (optimizeQfp(MI, MBB)) {
318-
MI->eraseFromParent();
319-
LLVM_DEBUG(dbgs() << "\t....Removing....");
320-
Changed = true;
392+
if (QFPInstMap.count(MI->getOpcode())) {
393+
auto OpC = MI->getOpcode();
394+
if (DisableQFOptForMul && HII->isQFPMul(MI))
395+
continue;
396+
if (OpC != Hexagon::V6_vconv_sf_qf32 &&
397+
OpC != Hexagon::V6_vconv_hf_qf16) {
398+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
399+
if (optimizeQfp(MI, MBB)) {
400+
MI->eraseFromParent();
401+
LLVM_DEBUG(dbgs() << "\t....Removing....");
402+
Changed = true;
403+
}
321404
}
322405
}
323406
}

0 commit comments

Comments
 (0)