88//
99// This file implements a pre-ra pass to optimize SELECT instructions. It tries
1010// to fold SELECT with its user in case one of the input operands of SELECT
11- // is immediate zero and it tries to fold ADD with SELECT if one of the inputs
12- // of ADD matches with the input of SELECT.
11+ // is immediate zero and it tries to fold arithmetic and bitwise instructions
12+ // with SELECT if one of the inputs of these instructions matches with the input
13+ // of SELECT.
1314//
1415// ===----------------------------------------------------------------------===//
1516
@@ -82,6 +83,34 @@ class EraVMOptimizeSelectPreRA : public MachineFunctionPass {
8283 Register In1, unsigned NewOpcode, unsigned NewCC,
8384 Register TieReg) const ;
8485
86+ // / Fold MI into SelectMI and return a folded instruction. To do this,
87+ // / we clone MI and placing new instruction just before the select,
88+ // / change its output with the output register of the SelectMI
89+ // / instruction, update conditional code with NewCC, tie the TieReg
90+ // / with the output register and add implicit flags because we are
91+ // / updating conditional code.
92+ // /
93+ // / Example:
94+ // /
95+ // / folding
96+ // / %2:gr256 = ADDrrr_s %0:gr256, %1:gr256, i256 0
97+ // / %4:gr256 = SELrrr %2:gr256, %0:gr256, i256 2, implicit $flags
98+ // / into
99+ // / %4:gr256 = ADDrrr_s %0:gr256, %1:gr256, 2,
100+ // / implicit %0:gr256(tied-def 0), implicit $flags
101+ // /
102+ MachineInstr *foldToSelect (MachineInstr &MI, MachineInstr &SelectMI,
103+ unsigned NewCC, Register TieReg) const ;
104+
105+ // / Return an out register of a folding-candidate MI. In case MI has two
106+ // / outputs, we can only fold instruction iff one output is used and the
107+ // / other is not.
108+ // / In case both outputs are used and one of them is used by a SELECT,
109+ // / folding will make this instruction to execute conditionally meaning
110+ // / that other output will be calculated iff the condition is met, thus
111+ // / the folded instruction will not always produce the expected results.
112+ Register getOutRegToFold (const MachineInstr &MI) const ;
113+
85114 // / In case one of the input operand of SELECT is immediate zero, then it is
86115 // / possible to fold it with its sole user to result in just one folded
87116 // / instruction.
@@ -98,9 +127,10 @@ class EraVMOptimizeSelectPreRA : public MachineFunctionPass {
98127 // / if z and y can be allocated to same reg. The tie is used to ensure this.
99128 bool tryFoldSelectZero (MachineBasicBlock &MBB);
100129
101- // / In case ADD is fed into a SELECT, and one of the inputs of ADD matches
102- // / with the input of SELECT, then it is possible to fold ADD with SELECT
103- // / to result in just one folded instruction.
130+ // / In case an arithmetic or a bitwise instruction is fed into a SELECT, and
131+ // / one of the inputs of these instructions matches with the input of SELECT,
132+ // / then it is possible to fold it with SELECT to result in just one folded
133+ // / instruction.
104134 // /
105135 // / A typical case is like below:
106136 // /
@@ -113,7 +143,7 @@ class EraVMOptimizeSelectPreRA : public MachineFunctionPass {
113143 // /
114144 // / if outSEL and x/y can be allocated to same reg.
115145 // / The tie is used to ensure this.
116- bool tryFoldAddToSelect (MachineBasicBlock &MBB);
146+ bool tryFoldToSelect (MachineBasicBlock &MBB);
117147};
118148
119149char EraVMOptimizeSelectPreRA::ID = 0 ;
@@ -208,6 +238,67 @@ MachineInstr *EraVMOptimizeSelectPreRA::getFoldedInst(
208238 return NewMI;
209239}
210240
241+ MachineInstr *EraVMOptimizeSelectPreRA::foldToSelect (MachineInstr &MI,
242+ MachineInstr &SelectMI,
243+ unsigned NewCC,
244+ Register TieReg) const {
245+ // Clone the original instruction.
246+ auto &MF = *MI.getMF ();
247+ auto *NewMI = MF.CloneMachineInstr (&MI);
248+
249+ // Place the new instruction right before the SELECT instruction.
250+ SelectMI.getParent ()->insert (SelectMI, NewMI);
251+
252+ Register OutReg = getOutRegToFold (*NewMI);
253+ assert (OutReg != EraVM::NoRegister && " Unexpected output register." );
254+
255+ // Find the def that needs to be updated.
256+ auto *DefMOToUpdate =
257+ llvm::find_if (NewMI->defs (), [OutReg](MachineOperand &MO) {
258+ return MO.getReg () == OutReg;
259+ });
260+ assert (DefMOToUpdate != NewMI->defs ().end () &&
261+ " Didn't find the def register to update." );
262+
263+ // Change the def register to the def register of the select instruction.
264+ DefMOToUpdate->setReg (SelectMI.getOperand (0 ).getReg ());
265+
266+ // Get the def operand idx to tie.
267+ unsigned DefIdxToTie = DefMOToUpdate->getOperandNo ();
268+
269+ EraVM::ccIterator (*NewMI)->ChangeToImmediate (NewCC);
270+ MachineInstrBuilder (MF, NewMI)
271+ .addReg (TieReg, RegState::Implicit)
272+ .addReg (EraVM::Flags, RegState::Implicit);
273+
274+ // Add tie to ensure those two operands will get same reg
275+ // after RA pass. This is the key to make transformation in this pass
276+ // correct.
277+ NewMI->tieOperands (DefIdxToTie, NewMI->getNumOperands () - 2 );
278+ return NewMI;
279+ }
280+
281+ Register
282+ EraVMOptimizeSelectPreRA::getOutRegToFold (const MachineInstr &MI) const {
283+ assert ((MI.getNumExplicitDefs () == 1 || MI.getNumExplicitDefs () == 2 ) &&
284+ " Unexpected number of register outputs" );
285+
286+ if (MI.getNumExplicitDefs () == 2 ) {
287+ // In case both outputs are used, bail out.
288+ if (!RegInfo->use_nodbg_empty (EraVM::out0Iterator (MI)->getReg ()) &&
289+ !RegInfo->use_nodbg_empty (EraVM::out1Iterator (MI)->getReg ()))
290+ return EraVM::NoRegister;
291+
292+ // Return the output that should have the use.
293+ return RegInfo->use_nodbg_empty (EraVM::out0Iterator (MI)->getReg ())
294+ ? EraVM::out1Iterator (MI)->getReg ()
295+ : EraVM::out0Iterator (MI)->getReg ();
296+ }
297+
298+ // Return an output for single register output.
299+ return EraVM::out0Iterator (MI)->getReg ();
300+ }
301+
211302bool EraVMOptimizeSelectPreRA::tryFoldSelectZero (MachineBasicBlock &MBB) {
212303 SmallPtrSet<MachineInstr *, 4 > ToRemove;
213304 for (auto &MI : MBB) {
@@ -297,58 +388,59 @@ bool EraVMOptimizeSelectPreRA::tryFoldSelectZero(MachineBasicBlock &MBB) {
297388 return !ToRemove.empty ();
298389}
299390
300- bool EraVMOptimizeSelectPreRA::tryFoldAddToSelect (MachineBasicBlock &MBB) {
391+ bool EraVMOptimizeSelectPreRA::tryFoldToSelect (MachineBasicBlock &MBB) {
301392 SmallVector<MachineInstr *, 16 > ToRemove;
302393 SmallPtrSet<MachineInstr *, 16 > UsesToUpdate;
303394
304- // 1. Collect all instructions to be combined.
305395 for (auto &MI : MBB) {
306- if (!TII->isAdd (MI) || TII->getCCCode (MI) != EraVMCC::COND_NONE ||
396+ if ((!TII->isArithmetic (MI) && !TII->isBitwise (MI)) ||
397+ TII->getCCCode (MI) != EraVMCC::COND_NONE ||
307398 EraVMInstrInfo::isFlagSettingInstruction (MI) ||
308399 !EraVM::hasRROutAddressingMode (MI))
309400 continue ;
310401
311- // It's expected that if there are more uses of add, it's very unlikely that
402+ // If there are more than one output, we have to make sure the
403+ // other output is not used before we can fold this instruction.
404+ Register OutReg = getOutRegToFold (MI);
405+ if (OutReg == EraVM::NoRegister)
406+ continue ;
407+
408+ // It's expected that if there are more uses, it's very unlikely that
312409 // all of them are select instruction where folding is feasible.
313- Register OutAddReg = EraVM::out0Iterator (MI)->getReg ();
314- if (!RegInfo->hasOneNonDBGUser (OutAddReg))
410+ if (!RegInfo->hasOneNonDBGUser (OutReg))
315411 continue ;
316412
317- MachineInstr &UseMI = *RegInfo->use_instr_nodbg_begin (OutAddReg );
413+ MachineInstr &UseMI = *RegInfo->use_instr_nodbg_begin (OutReg );
318414 if (UsesToUpdate.count (&UseMI))
319415 continue ;
320416
321- SmallSet<Register, 2 > InAddRegs ;
322- InAddRegs .insert (EraVM::in1Iterator (MI)->getReg ());
417+ SmallSet<Register, 2 > InRegs ;
418+ InRegs .insert (EraVM::in1Iterator (MI)->getReg ());
323419 if (EraVM::hasRRInAddressingMode (MI))
324- InAddRegs .insert (EraVM::in0Iterator (MI)->getReg ());
420+ InRegs .insert (EraVM::in0Iterator (MI)->getReg ());
325421
326- // In order to fold add to select , we expect that other input of a select
327- // instruction is matching with one of the add inputs.
422+ // In order to fold, we expect that other input of a select instruction
423+ // is matching with one of the MI inputs.
328424 if (UseMI.getOpcode () != EraVM::SELrrr ||
329- (!InAddRegs .count (EraVM::in0Iterator (UseMI)->getReg ()) &&
330- !InAddRegs .count (EraVM::in1Iterator (UseMI)->getReg ())))
425+ (!InRegs .count (EraVM::in0Iterator (UseMI)->getReg ()) &&
426+ !InRegs .count (EraVM::in1Iterator (UseMI)->getReg ())))
331427 continue ;
332428
333- bool OutAddIsIn1Use = EraVM::out0Iterator (MI)->getReg () ==
334- EraVM::in1Iterator (UseMI)->getReg ();
429+ bool OutIsIn1Use = OutReg == EraVM::in1Iterator (UseMI)->getReg ();
335430 auto CC = getImmOrCImm (*EraVM::ccIterator (UseMI));
336431
337432 // The COND_OF is overflow LT which hasn't reversal version, so we don't
338- // attempt to inverse it.
339- if (OutAddIsIn1Use && CC == EraVMCC::COND_OF)
433+ // attempt to inverse it.
434+ if (OutIsIn1Use && CC == EraVMCC::COND_OF)
340435 continue ;
341436
342- auto CCNewMI = OutAddIsIn1Use ? InverseCond[CC] : CC;
343- Register TieReg = OutAddIsIn1Use ? EraVM::in0Iterator (UseMI)->getReg ()
344- : EraVM::in1Iterator (UseMI)->getReg ();
437+ auto CCNewMI = OutIsIn1Use ? InverseCond[CC] : CC;
438+ Register TieReg = OutIsIn1Use ? EraVM::in0Iterator (UseMI)->getReg ()
439+ : EraVM::in1Iterator (UseMI)->getReg ();
345440
346- [[maybe_unused]] auto *NewMI = getFoldedInst (
347- UseMI, MI, EraVM::in0Range (MI), EraVM::in1Iterator (MI)->getReg (),
348- MI.getOpcode (), CCNewMI, TieReg);
349- LLVM_DEBUG (dbgs () << " == Folding add:" ; MI.dump ();
350- dbgs () << " and use:" ; UseMI.dump ();
351- dbgs () << " into:" ; NewMI->dump (););
441+ [[maybe_unused]] auto *NewMI = foldToSelect (MI, UseMI, CCNewMI, TieReg);
442+ LLVM_DEBUG (dbgs () << " == Folding:" ; MI.dump (); dbgs () << " and use:" ;
443+ UseMI.dump (); dbgs () << " into:" ; NewMI->dump (););
352444
353445 UsesToUpdate.insert (&UseMI);
354446 ToRemove.emplace_back (&MI);
@@ -373,7 +465,7 @@ bool EraVMOptimizeSelectPreRA::runOnMachineFunction(MachineFunction &MF) {
373465 bool Changed = false ;
374466 for (MachineBasicBlock &MBB : MF) {
375467 Changed |= tryFoldSelectZero (MBB);
376- Changed |= tryFoldAddToSelect (MBB);
468+ Changed |= tryFoldToSelect (MBB);
377469 }
378470 return Changed;
379471}
0 commit comments