5858// are PHI inst.
5959//
6060// ===----------------------------------------------------------------------===//
61- # include < unordered_set >
61+
6262#define HEXAGON_QFP_OPTIMIZER " QFP optimizer pass"
6363
6464#include " Hexagon.h"
@@ -86,6 +86,10 @@ using namespace llvm;
8686cl::opt<bool >
8787 DisableQFOptimizer (" disable-qfp-opt" , cl::init(false ),
8888 cl::desc(" Disable optimization of Qfloat operations." ));
89+ cl::opt<bool >
90+ DisableQFOptForMul (" disable-qfp-opt-mul" , cl::init(true ),
91+ cl::desc(
92+ " Disable optimization of Qfloat operations for multiply." ));
8993
9094namespace {
9195const std::map<unsigned short , unsigned short > QFPInstMap{
@@ -101,96 +105,135 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
101105 {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
102106 {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
103107 {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
104- {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
108+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
109+ {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
110+ {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
111+ {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
112+ {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
113+ {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
114+ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
105115} // namespace
106116
107- namespace {
117+ namespace llvm {
118+ FunctionPass *createHexagonQFPOptimizer ();
119+ void initializeHexagonQFPOptimizerPass (PassRegistry &);
120+ } // namespace llvm
108121
122+ namespace {
109123struct HexagonQFPOptimizer : public MachineFunctionPass {
110124public:
111125 static char ID;
112-
113126 HexagonQFPOptimizer () : MachineFunctionPass(ID) {}
114-
115127 bool runOnMachineFunction (MachineFunction &MF) override ;
116-
117128 bool optimizeQfp (MachineInstr *MI, MachineBasicBlock *MBB);
118-
129+ bool optimizeQfpTwoOp (MachineInstr *MI, MachineBasicBlock *MBB);
130+ bool optimizeQfpOneOp (MachineInstr *MI, MachineBasicBlock *MBB);
119131 StringRef getPassName () const override { return HEXAGON_QFP_OPTIMIZER; }
120-
121132 void getAnalysisUsage (AnalysisUsage &AU) const override {
122133 AU.setPreservesCFG ();
123134 MachineFunctionPass::getAnalysisUsage (AU);
124135 }
125-
126136private:
127137 const HexagonSubtarget *HST = nullptr ;
128138 const HexagonInstrInfo *HII = nullptr ;
129139 const MachineRegisterInfo *MRI = nullptr ;
130140};
131-
132141char HexagonQFPOptimizer::ID = 0 ;
133142} // namespace
134-
135143INITIALIZE_PASS (HexagonQFPOptimizer, " hexagon-qfp-optimizer" ,
136144 HEXAGON_QFP_OPTIMIZER, false , false )
137-
138145FunctionPass *llvm::createHexagonQFPOptimizer() {
139146 return new HexagonQFPOptimizer ();
140147}
141148
142149bool HexagonQFPOptimizer::optimizeQfp (MachineInstr *MI,
143150 MachineBasicBlock *MBB) {
144-
145- // Early exit:
146- // - if instruction is invalid or has too few operands (QFP ops need 2 sources
147- // + 1 dest),
148- // - or does not have a transformation mapping.
149- if (MI->getNumOperands () < 3 )
151+ if (MI->getNumOperands () == 2 )
152+ return optimizeQfpOneOp (MI, MBB);
153+ else if (MI->getNumOperands () == 3 )
154+ return optimizeQfpTwoOp (MI, MBB);
155+ else
150156 return false ;
157+ }
158+
159+ bool HexagonQFPOptimizer::optimizeQfpOneOp (MachineInstr *MI,
160+ MachineBasicBlock *MBB) {
161+ unsigned Op0F = 0 ;
151162 auto It = QFPInstMap.find (MI->getOpcode ());
152163 if (It == QFPInstMap.end ())
153164 return false ;
154165 unsigned short InstTy = It->second ;
166+ // Get the reachind defs of MI
167+ MachineInstr *DefMI = MRI->getVRegDef (MI->getOperand (1 ).getReg ());
168+ MachineOperand &Res = MI->getOperand (0 );
169+ if (!Res.isReg ())
170+ return false ;
171+ LLVM_DEBUG (dbgs () << " \n [Reaching Defs of operands]: " ; DefMI->dump ());
172+ MachineInstr *ReachDefDef = nullptr ;
173+ // Get the reaching def of the reaching def to check for W reg def
174+ if (DefMI->getNumOperands () > 1 && DefMI->getOperand (1 ).isReg () &&
175+ DefMI->getOperand (1 ).getReg ().isVirtual ())
176+ ReachDefDef = MRI->getVRegDef (DefMI->getOperand (1 ).getReg ());
177+ unsigned ReachDefOp = DefMI->getOpcode ();
178+ MachineInstrBuilder MIB;
179+ // Check if the reaching def is a conversion
180+ if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
181+ ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
182+ // Return if the reaching def of reaching def is W type
183+ if (ReachDefDef && MRI->getRegClass (ReachDefDef->getOperand (0 ).getReg ()) ==
184+ &Hexagon::HvxWRRegClass)
185+ return false ;
186+ // Analyze the use operands of the conversion to get their KILL status
187+ MachineOperand &SrcOp = DefMI->getOperand (1 );
188+ Op0F = getKillRegState (SrcOp.isKill ());
189+ SrcOp.setIsKill (false );
190+ MIB = BuildMI (*MBB, MI, MI->getDebugLoc (), HII->get (InstTy), Res.getReg ())
191+ .addReg (SrcOp.getReg (), Op0F, SrcOp.getSubReg ());
192+ LLVM_DEBUG (dbgs () << " \n [Inserting]: " ; MIB.getInstr ()->dump ());
193+ return true ;
194+ }
195+ return false ;
196+ }
155197
198+ bool HexagonQFPOptimizer::optimizeQfpTwoOp (MachineInstr *MI,
199+ MachineBasicBlock *MBB) {
156200 unsigned Op0F = 0 ;
157201 unsigned Op1F = 0 ;
202+ auto It = QFPInstMap.find (MI->getOpcode ());
203+ if (It == QFPInstMap.end ())
204+ return false ;
205+ unsigned short InstTy = It->second ;
158206 // Get the reaching defs of MI, DefMI1 and DefMI2
159207 MachineInstr *DefMI1 = nullptr ;
160208 MachineInstr *DefMI2 = nullptr ;
161-
162209 if (MI->getOperand (1 ).isReg ())
163210 DefMI1 = MRI->getVRegDef (MI->getOperand (1 ).getReg ());
164211 if (MI->getOperand (2 ).isReg ())
165212 DefMI2 = MRI->getVRegDef (MI->getOperand (2 ).getReg ());
166213 if (!DefMI1 || !DefMI2)
167214 return false ;
168-
169215 MachineOperand &Res = MI->getOperand (0 );
216+ if (!Res.isReg ())
217+ return false ;
170218 MachineInstr *Inst1 = nullptr ;
171219 MachineInstr *Inst2 = nullptr ;
172220 LLVM_DEBUG (dbgs () << " \n [Reaching Defs of operands]: " ; DefMI1->dump ();
173221 DefMI2->dump ());
174-
175222 // Get the reaching defs of DefMI
176223 if (DefMI1->getNumOperands () > 1 && DefMI1->getOperand (1 ).isReg () &&
177224 DefMI1->getOperand (1 ).getReg ().isVirtual ())
178225 Inst1 = MRI->getVRegDef (DefMI1->getOperand (1 ).getReg ());
179-
180226 if (DefMI2->getNumOperands () > 1 && DefMI2->getOperand (1 ).isReg () &&
181227 DefMI2->getOperand (1 ).getReg ().isVirtual ())
182228 Inst2 = MRI->getVRegDef (DefMI2->getOperand (1 ).getReg ());
183-
184229 unsigned Def1OP = DefMI1->getOpcode ();
185230 unsigned Def2OP = DefMI2->getOpcode ();
186-
187231 MachineInstrBuilder MIB;
188- // Case 1: Both reaching defs of MI are qf to sf/hf conversions
232+ // Check if the both the reaching defs of MI are qf to sf/hf conversions
189233 if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
190234 Def2OP == Hexagon::V6_vconv_sf_qf32) ||
191235 (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
192236 Def2OP == Hexagon::V6_vconv_hf_qf16)) {
193-
194237 // If the reaching defs of DefMI are W register type, we return
195238 if ((Inst1 && Inst1->getNumOperands () > 0 && Inst1->getOperand (0 ).isReg () &&
196239 MRI->getRegClass (Inst1->getOperand (0 ).getReg ()) ==
@@ -199,17 +242,13 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
199242 MRI->getRegClass (Inst2->getOperand (0 ).getReg ()) ==
200243 &Hexagon::HvxWRRegClass))
201244 return false ;
202-
203245 // Analyze the use operands of the conversion to get their KILL status
204246 MachineOperand &Src1 = DefMI1->getOperand (1 );
205247 MachineOperand &Src2 = DefMI2->getOperand (1 );
206-
207248 Op0F = getKillRegState (Src1.isKill ());
208249 Src1.setIsKill (false );
209-
210250 Op1F = getKillRegState (Src2.isKill ());
211251 Src2.setIsKill (false );
212-
213252 if (MI->getOpcode () != Hexagon::V6_vmpy_qf32_sf) {
214253 auto OuterIt = QFPInstMap.find (MI->getOpcode ());
215254 if (OuterIt == QFPInstMap.end ())
@@ -219,28 +258,23 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
219258 return false ;
220259 InstTy = InnerIt->second ;
221260 }
222-
223261 MIB = BuildMI (*MBB, MI, MI->getDebugLoc (), HII->get (InstTy), Res.getReg ())
224262 .addReg (Src1.getReg (), Op0F, Src1.getSubReg ())
225263 .addReg (Src2.getReg (), Op1F, Src2.getSubReg ());
226264 LLVM_DEBUG (dbgs () << " \n [Inserting]: " ; MIB.getInstr ()->dump ());
227265 return true ;
228-
229- // Case 2: Left operand is conversion to sf/hf
266+ // Check if left operand's reaching def is a conversion to sf/hf
230267 } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
231268 Def2OP != Hexagon::V6_vconv_sf_qf32) ||
232269 (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
233270 Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
234271 !DefMI2->isPHI () &&
235272 (MI->getOpcode () != Hexagon::V6_vmpy_qf32_sf)) {
236-
237273 if (Inst1 && MRI->getRegClass (Inst1->getOperand (0 ).getReg ()) ==
238274 &Hexagon::HvxWRRegClass)
239275 return false ;
240-
241276 MachineOperand &Src1 = DefMI1->getOperand (1 );
242277 MachineOperand &Src2 = MI->getOperand (2 );
243-
244278 Op0F = getKillRegState (Src1.isKill ());
245279 Src1.setIsKill (false );
246280 Op1F = getKillRegState (Src2.isKill ());
@@ -249,57 +283,58 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
249283 .addReg (Src2.getReg (), Op1F, Src2.getSubReg ());
250284 LLVM_DEBUG (dbgs () << " \n [Inserting]: " ; MIB.getInstr ()->dump ());
251285 return true ;
252-
253- // Case 2: Left operand is conversion to sf/hf
286+ // Check if right operand's reaching def is a conversion to sf/hf
254287 } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
255288 Def2OP == Hexagon::V6_vconv_sf_qf32) ||
256289 (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
257290 Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
258291 !DefMI1->isPHI () &&
259292 (MI->getOpcode () != Hexagon::V6_vmpy_qf32_sf)) {
260293 // The second operand of original instruction is converted.
261- // In "mix" instructions, "qf" operand is always the first operand.
262-
263- // Caveat: vsub is not commutative w.r.t operands.
264- if (InstTy == Hexagon::V6_vsub_qf16_mix ||
265- InstTy == Hexagon::V6_vsub_qf32_mix)
266- return false ;
267-
268294 if (Inst2 && MRI->getRegClass (Inst2->getOperand (0 ).getReg ()) ==
269295 &Hexagon::HvxWRRegClass)
270296 return false ;
271-
272297 MachineOperand &Src1 = MI->getOperand (1 );
273298 MachineOperand &Src2 = DefMI2->getOperand (1 );
274-
275299 Op1F = getKillRegState (Src2.isKill ());
276300 Src2.setIsKill (false );
277301 Op0F = getKillRegState (Src1.isKill ());
278- MIB = BuildMI (*MBB, MI, MI->getDebugLoc (), HII->get (InstTy), Res.getReg ())
279- .addReg (Src2.getReg (), Op1F,
280- Src2.getSubReg ()) // Notice the operands are flipped.
281- .addReg (Src1.getReg (), Op0F, Src1.getSubReg ());
302+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
303+ InstTy == Hexagon::V6_vsub_qf32_mix) {
304+ if (!HST->useHVXV81Ops ())
305+ // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
306+ return false ;
307+ // vsub is not commutative w.r.t. operands -> treat it as a special case
308+ // to choose the correct mix instruction.
309+ if (Def2OP == Hexagon::V6_vconv_sf_qf32)
310+ InstTy = Hexagon::V6_vsub_sf_mix;
311+ else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
312+ InstTy = Hexagon::V6_vsub_hf_mix;
313+ MIB = BuildMI (*MBB, MI, MI->getDebugLoc (), HII->get (InstTy), Res.getReg ())
314+ .addReg (Src1.getReg (), Op0F, Src1.getSubReg ())
315+ .addReg (Src2.getReg (), Op1F, Src2.getSubReg ());
316+ } else {
317+ MIB = BuildMI (*MBB, MI, MI->getDebugLoc (), HII->get (InstTy), Res.getReg ())
318+ .addReg (Src2.getReg (), Op1F,
319+ Src2.getSubReg ()) // Notice the operands are flipped.
320+ .addReg (Src1.getReg (), Op0F, Src1.getSubReg ());
321+ }
282322 LLVM_DEBUG (dbgs () << " \n [Inserting]: " ; MIB.getInstr ()->dump ());
283323 return true ;
284324 }
285-
286325 return false ;
287326}
288327
289328bool HexagonQFPOptimizer::runOnMachineFunction (MachineFunction &MF) {
290-
291329 bool Changed = false ;
292-
293330 if (DisableQFOptimizer)
294331 return Changed;
295-
296332 HST = &MF.getSubtarget <HexagonSubtarget>();
297333 if (!HST->useHVXV68Ops () || !HST->usePackets () ||
298334 skipFunction (MF.getFunction ()))
299335 return false ;
300336 HII = HST->getInstrInfo ();
301337 MRI = &MF.getRegInfo ();
302-
303338 MachineFunction::iterator MBBI = MF.begin ();
304339 LLVM_DEBUG (dbgs () << " \n === Running QFPOptimzer Pass for : " << MF.getName ()
305340 << " Optimize intermediate conversions ===\n " );
@@ -309,15 +344,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
309344 while (MII != MBBI->instr_end ()) {
310345 MachineInstr *MI = &*MII;
311346 ++MII; // As MI might be removed.
312-
313- if (QFPInstMap.count (MI->getOpcode ()) &&
314- MI->getOpcode () != Hexagon::V6_vconv_sf_qf32 &&
315- MI->getOpcode () != Hexagon::V6_vconv_hf_qf16) {
316- LLVM_DEBUG (dbgs () << " \n ###Analyzing for removal: " ; MI->dump ());
317- if (optimizeQfp (MI, MBB)) {
318- MI->eraseFromParent ();
319- LLVM_DEBUG (dbgs () << " \t ....Removing...." );
320- Changed = true ;
347+ if (QFPInstMap.count (MI->getOpcode ())) {
348+ auto OpC = MI->getOpcode ();
349+ if (DisableQFOptForMul && HII->isQFPMul (MI))
350+ continue ;
351+ if (OpC != Hexagon::V6_vconv_sf_qf32 &&
352+ OpC != Hexagon::V6_vconv_hf_qf16) {
353+ LLVM_DEBUG (dbgs () << " \n ###Analyzing for removal: " ;MI->dump ());
354+ if (optimizeQfp (MI, MBB)) {
355+ MI->eraseFromParent ();
356+ LLVM_DEBUG (dbgs () << " \t ....Removing...." );
357+ Changed = true ;
358+ }
321359 }
322360 }
323361 }
0 commit comments