Skip to content

Commit 42caf09

Browse files
fhossein-quickaushik-quicincSantanu Das
committed
[Hexagon] Improve QFP Optimizer
This patch enhances HexagonQFPOptimizer in multiple ways: 1. Refactor the code for better readability and maintainability. 2. Optimize vabs,vneg and vilog2 converts The three instruction mentioned can be optimized like below: ```v1.sf = v0.qf32 v2.qf = vneg v1.sf``` to ```v2.qf = vneg v0.qf32``` This optimization eliminates one conversion and is applicable to both qf32 and qf16 types. 3. Enable vsub fusion with mixed arguments Previously, QFPOptimizer did not fuse partial qfloat operands with vsub. This update allows selective use of vsub_hf_mix, vsub_sf_mix, vsub_qf16_mix, and vsub_qf32_mix when appropriate. It also enables QFP simplifications involving vector pair subregisters. Example scenario in a machine basic block targeting Hexagon: ```v1.qf32 = ... // result of a vadd v2.sf = v1.qf32 v3.qf32 = vmpy(v2.sf, v2.sf)``` 4. Remove redundant conversions Under certain conditions, we previously bailed out before removing qf-to-sf/hf conversions. This patch removes that bailout, enabling more aggressive elimination of unnecessary conversions. 5. Don't optimize equals feeding into multiply: Removing converts feeding into multiply loses precision. This patch avoids optimizing multiplies along with giving the users an option to enable this by a flag. Patch By: Fateme Hosseini Co-authored-by: Kaushik Kulkarni <[email protected]> Co-authored-by: Santanu Das <[email protected]>
1 parent 5419097 commit 42caf09

File tree

8 files changed

+849
-68
lines changed

8 files changed

+849
-68
lines changed

llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
47534753
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
47544754
}
47554755

4756+
bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
4757+
return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
4758+
MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
4759+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
4760+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
4761+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
4762+
MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
4763+
MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
4764+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
4765+
MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
4766+
MI->getOpcode() == Hexagon::V6_vmpy_qf32);
4767+
}
4768+
47564769
// Addressing mode relations.
47574770
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
47584771
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;

llvm/lib/Target/Hexagon/HexagonInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
532532
}
533533

534534
MCInst getNop() const override;
535+
bool isQFPMul(const MachineInstr *MF) const;
535536
};
536537

537538
/// \brief Create RegSubRegPair from a register MachineOperand

llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp

Lines changed: 103 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
// are PHI inst.
5959
//
6060
//===----------------------------------------------------------------------===//
61-
#include <unordered_set>
61+
6262
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
6363

6464
#include "Hexagon.h"
@@ -86,6 +86,10 @@ using namespace llvm;
8686
cl::opt<bool>
8787
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
8888
cl::desc("Disable optimization of Qfloat operations."));
89+
cl::opt<bool>
90+
DisableQFOptForMul("disable-qfp-opt-mul", cl::init(true),
91+
cl::desc(
92+
"Disable optimization of Qfloat operations for multiply."));
8993

9094
namespace {
9195
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,96 +105,135 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
101105
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
102106
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
103107
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
104-
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
108+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
109+
{Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
110+
{Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
111+
{Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
112+
{Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
113+
{Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
114+
{Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
105115
} // namespace
106116

107-
namespace {
117+
namespace llvm {
118+
FunctionPass *createHexagonQFPOptimizer();
119+
void initializeHexagonQFPOptimizerPass(PassRegistry &);
120+
} // namespace llvm
108121

122+
namespace {
109123
struct HexagonQFPOptimizer : public MachineFunctionPass {
110124
public:
111125
static char ID;
112-
113126
HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
114-
115127
bool runOnMachineFunction(MachineFunction &MF) override;
116-
117128
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
118-
129+
bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
130+
bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
119131
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
120-
121132
void getAnalysisUsage(AnalysisUsage &AU) const override {
122133
AU.setPreservesCFG();
123134
MachineFunctionPass::getAnalysisUsage(AU);
124135
}
125-
126136
private:
127137
const HexagonSubtarget *HST = nullptr;
128138
const HexagonInstrInfo *HII = nullptr;
129139
const MachineRegisterInfo *MRI = nullptr;
130140
};
131-
132141
char HexagonQFPOptimizer::ID = 0;
133142
} // namespace
134-
135143
INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
136144
HEXAGON_QFP_OPTIMIZER, false, false)
137-
138145
FunctionPass *llvm::createHexagonQFPOptimizer() {
139146
return new HexagonQFPOptimizer();
140147
}
141148

142149
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
143150
MachineBasicBlock *MBB) {
144-
145-
// Early exit:
146-
// - if instruction is invalid or has too few operands (QFP ops need 2 sources
147-
// + 1 dest),
148-
// - or does not have a transformation mapping.
149-
if (MI->getNumOperands() < 3)
151+
if (MI->getNumOperands() == 2)
152+
return optimizeQfpOneOp(MI, MBB);
153+
else if (MI->getNumOperands() == 3)
154+
return optimizeQfpTwoOp(MI, MBB);
155+
else
150156
return false;
157+
}
158+
159+
bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
160+
MachineBasicBlock *MBB) {
161+
unsigned Op0F = 0;
151162
auto It = QFPInstMap.find(MI->getOpcode());
152163
if (It == QFPInstMap.end())
153164
return false;
154165
unsigned short InstTy = It->second;
166+
// Get the reachind defs of MI
167+
MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
168+
MachineOperand &Res = MI->getOperand(0);
169+
if (!Res.isReg())
170+
return false;
171+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
172+
MachineInstr *ReachDefDef = nullptr;
173+
// Get the reaching def of the reaching def to check for W reg def
174+
if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
175+
DefMI->getOperand(1).getReg().isVirtual())
176+
ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
177+
unsigned ReachDefOp = DefMI->getOpcode();
178+
MachineInstrBuilder MIB;
179+
// Check if the reaching def is a conversion
180+
if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
181+
ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
182+
// Return if the reaching def of reaching def is W type
183+
if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
184+
&Hexagon::HvxWRRegClass)
185+
return false;
186+
// Analyze the use operands of the conversion to get their KILL status
187+
MachineOperand &SrcOp = DefMI->getOperand(1);
188+
Op0F = getKillRegState(SrcOp.isKill());
189+
SrcOp.setIsKill(false);
190+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
191+
.addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
192+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
193+
return true;
194+
}
195+
return false;
196+
}
155197

198+
bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
199+
MachineBasicBlock *MBB) {
156200
unsigned Op0F = 0;
157201
unsigned Op1F = 0;
202+
auto It = QFPInstMap.find(MI->getOpcode());
203+
if (It == QFPInstMap.end())
204+
return false;
205+
unsigned short InstTy = It->second;
158206
// Get the reaching defs of MI, DefMI1 and DefMI2
159207
MachineInstr *DefMI1 = nullptr;
160208
MachineInstr *DefMI2 = nullptr;
161-
162209
if (MI->getOperand(1).isReg())
163210
DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
164211
if (MI->getOperand(2).isReg())
165212
DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
166213
if (!DefMI1 || !DefMI2)
167214
return false;
168-
169215
MachineOperand &Res = MI->getOperand(0);
216+
if (!Res.isReg())
217+
return false;
170218
MachineInstr *Inst1 = nullptr;
171219
MachineInstr *Inst2 = nullptr;
172220
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
173221
DefMI2->dump());
174-
175222
// Get the reaching defs of DefMI
176223
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
177224
DefMI1->getOperand(1).getReg().isVirtual())
178225
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
179-
180226
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
181227
DefMI2->getOperand(1).getReg().isVirtual())
182228
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
183-
184229
unsigned Def1OP = DefMI1->getOpcode();
185230
unsigned Def2OP = DefMI2->getOpcode();
186-
187231
MachineInstrBuilder MIB;
188-
// Case 1: Both reaching defs of MI are qf to sf/hf conversions
232+
// Check if the both the reaching defs of MI are qf to sf/hf conversions
189233
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
190234
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
191235
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
192236
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
193-
194237
// If the reaching defs of DefMI are W register type, we return
195238
if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
196239
MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
@@ -199,17 +242,13 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
199242
MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
200243
&Hexagon::HvxWRRegClass))
201244
return false;
202-
203245
// Analyze the use operands of the conversion to get their KILL status
204246
MachineOperand &Src1 = DefMI1->getOperand(1);
205247
MachineOperand &Src2 = DefMI2->getOperand(1);
206-
207248
Op0F = getKillRegState(Src1.isKill());
208249
Src1.setIsKill(false);
209-
210250
Op1F = getKillRegState(Src2.isKill());
211251
Src2.setIsKill(false);
212-
213252
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
214253
auto OuterIt = QFPInstMap.find(MI->getOpcode());
215254
if (OuterIt == QFPInstMap.end())
@@ -219,28 +258,23 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
219258
return false;
220259
InstTy = InnerIt->second;
221260
}
222-
223261
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
224262
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
225263
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
226264
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
227265
return true;
228-
229-
// Case 2: Left operand is conversion to sf/hf
266+
// Check if left operand's reaching def is a conversion to sf/hf
230267
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
231268
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
232269
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
233270
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
234271
!DefMI2->isPHI() &&
235272
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
236-
237273
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
238274
&Hexagon::HvxWRRegClass)
239275
return false;
240-
241276
MachineOperand &Src1 = DefMI1->getOperand(1);
242277
MachineOperand &Src2 = MI->getOperand(2);
243-
244278
Op0F = getKillRegState(Src1.isKill());
245279
Src1.setIsKill(false);
246280
Op1F = getKillRegState(Src2.isKill());
@@ -249,57 +283,58 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
249283
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
250284
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
251285
return true;
252-
253-
// Case 2: Left operand is conversion to sf/hf
286+
// Check if right operand's reaching def is a conversion to sf/hf
254287
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
255288
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
256289
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
257290
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
258291
!DefMI1->isPHI() &&
259292
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
260293
// The second operand of original instruction is converted.
261-
// In "mix" instructions, "qf" operand is always the first operand.
262-
263-
// Caveat: vsub is not commutative w.r.t operands.
264-
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
265-
InstTy == Hexagon::V6_vsub_qf32_mix)
266-
return false;
267-
268294
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
269295
&Hexagon::HvxWRRegClass)
270296
return false;
271-
272297
MachineOperand &Src1 = MI->getOperand(1);
273298
MachineOperand &Src2 = DefMI2->getOperand(1);
274-
275299
Op1F = getKillRegState(Src2.isKill());
276300
Src2.setIsKill(false);
277301
Op0F = getKillRegState(Src1.isKill());
278-
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
279-
.addReg(Src2.getReg(), Op1F,
280-
Src2.getSubReg()) // Notice the operands are flipped.
281-
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
302+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
303+
InstTy == Hexagon::V6_vsub_qf32_mix) {
304+
if (!HST->useHVXV81Ops())
305+
// vsub_(hf|sf)_mix insts are only avlbl on hvx81+
306+
return false;
307+
// vsub is not commutative w.r.t. operands -> treat it as a special case
308+
// to choose the correct mix instruction.
309+
if (Def2OP == Hexagon::V6_vconv_sf_qf32)
310+
InstTy = Hexagon::V6_vsub_sf_mix;
311+
else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
312+
InstTy = Hexagon::V6_vsub_hf_mix;
313+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
314+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
315+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
316+
} else {
317+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
318+
.addReg(Src2.getReg(), Op1F,
319+
Src2.getSubReg()) // Notice the operands are flipped.
320+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
321+
}
282322
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
283323
return true;
284324
}
285-
286325
return false;
287326
}
288327

289328
bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
290-
291329
bool Changed = false;
292-
293330
if (DisableQFOptimizer)
294331
return Changed;
295-
296332
HST = &MF.getSubtarget<HexagonSubtarget>();
297333
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
298334
skipFunction(MF.getFunction()))
299335
return false;
300336
HII = HST->getInstrInfo();
301337
MRI = &MF.getRegInfo();
302-
303338
MachineFunction::iterator MBBI = MF.begin();
304339
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
305340
<< " Optimize intermediate conversions ===\n");
@@ -309,15 +344,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
309344
while (MII != MBBI->instr_end()) {
310345
MachineInstr *MI = &*MII;
311346
++MII; // As MI might be removed.
312-
313-
if (QFPInstMap.count(MI->getOpcode()) &&
314-
MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
315-
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
316-
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
317-
if (optimizeQfp(MI, MBB)) {
318-
MI->eraseFromParent();
319-
LLVM_DEBUG(dbgs() << "\t....Removing....");
320-
Changed = true;
347+
if (QFPInstMap.count(MI->getOpcode())) {
348+
auto OpC = MI->getOpcode();
349+
if (DisableQFOptForMul && HII->isQFPMul(MI))
350+
continue;
351+
if (OpC != Hexagon::V6_vconv_sf_qf32 &&
352+
OpC != Hexagon::V6_vconv_hf_qf16) {
353+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: ";MI->dump());
354+
if (optimizeQfp(MI, MBB)) {
355+
MI->eraseFromParent();
356+
LLVM_DEBUG(dbgs() << "\t....Removing....");
357+
Changed = true;
358+
}
321359
}
322360
}
323361
}

0 commit comments

Comments
 (0)