@@ -183,7 +183,6 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
183183 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
184184 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
185185 int64_t Offset) {
186- assert (isInt<32 >(Offset) && " Unexpected offset" );
187186 // Put the offset back in Hi and the Lo
188187 Hi20.getOperand (1 ).setOffset (Offset);
189188 Lo12.getOperand (2 ).setOffset (Offset);
@@ -209,22 +208,35 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
209208// instructions and deletes TailAdd and the instructions that produced the
210209// offset.
211210//
212- // Base address lowering is of the form:
213- // Hi20: pcalau12i vreg1, %pc_hi20(s)
214- // Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
215- // / \
216- // / \
217- // / \
218- // / The large offset can be of two forms: \
219- // 1) Offset that has non zero bits in lower 2) Offset that has non zero
220- // 12 bits and upper 20 bits bits in upper 20 bits only
221- // OffsetHi: lu12i.w vreg3, 4
222- // OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
223- // \ /
224- // \ /
225- // \ /
226- // \ /
227- // TailAdd: add.d vreg4, vreg2, voff
211+ // (The instructions marked with "!" are not necessarily present)
212+ //
213+ // Base address lowering is of the form:
214+ // Hi20: pcalau12i vreg1, %pc_hi20(s)
215+ // +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
216+ // | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
217+ // +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
218+ // |
219+ // | The large offset can be one of the forms:
220+ // |
221+ // +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
222+ // | OffsetHi20: lu12i.w vreg3, 4
223+ // | OffsetLo12: ori voff, vreg3, 188 ------------------+
224+ // | |
225+ // +-> 2) Offset that has non zero bits in Hi20 bits only: |
226+ // | OffsetHi20: lu12i.w voff, 128 ------------------+
227+ // | |
228+ // +-> 3) Offset that has non zero bits in Lo20 bits: |
229+ // | OffsetHi20: lu12i.w vreg3, 121 ! |
230+ // | OffsetLo12: ori voff, vreg3, 122 ! |
231+ // | OffsetLo20: lu32i.d voff, 123 ------------------+
232+ // +-> 4) Offset that has non zero bits in Hi12 bits: |
233+ // OffsetHi20: lu12i.w vreg3, 121 ! |
234+ // OffsetLo12: ori voff, vreg3, 122 ! |
235+ // OffsetLo20: lu32i.d vreg3, 123 ! |
236+ // OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
237+ // |
238+ // TailAdd: add.d vreg4, vreg2, voff <------------------+
239+ //
228240bool LoongArchMergeBaseOffsetOpt::foldLargeOffset (
229241 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
230242 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
@@ -235,55 +247,81 @@ bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
235247 Register Rs = TailAdd.getOperand (1 ).getReg ();
236248 Register Rt = TailAdd.getOperand (2 ).getReg ();
237249 Register Reg = Rs == GAReg ? Rt : Rs;
250+ SmallVector<MachineInstr *, 4 > Instrs;
251+ int64_t Offset = 0 ;
252+ int64_t Mask = -1 ;
253+
254+ // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
255+ for (int i = 0 ; i < 4 ; i++) {
256+ // Handle Reg is R0.
257+ if (Reg == LoongArch::R0)
258+ break ;
238259
239- // Can't fold if the register has more than one use.
240- if (!Reg.isVirtual () || !MRI->hasOneUse (Reg))
241- return false ;
242- // This can point to an ORI or a LU12I.W:
243- MachineInstr &OffsetTail = *MRI->getVRegDef (Reg);
244- if (OffsetTail.getOpcode () == LoongArch::ORI) {
245- // The offset value has non zero bits in both %hi and %lo parts.
246- // Detect an ORI that feeds from a LU12I.W instruction.
247- MachineOperand &OriImmOp = OffsetTail.getOperand (2 );
248- if (OriImmOp.getTargetFlags () != LoongArchII::MO_None)
260+ // Can't fold if the register has more than one use.
261+ if (!Reg.isVirtual () || !MRI->hasOneUse (Reg))
249262 return false ;
250- Register OriReg = OffsetTail.getOperand (1 ).getReg ();
251- int64_t OffLo = OriImmOp.getImm ();
252-
253- // Handle rs1 of ORI is R0.
254- if (OriReg == LoongArch::R0) {
255- LLVM_DEBUG (dbgs () << " Offset Instrs: " << OffsetTail);
256- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
257- OffsetTail.eraseFromParent ();
258- return true ;
259- }
260263
261- MachineInstr &OffsetLu12i = *MRI->getVRegDef (OriReg);
262- MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand (1 );
263- if (OffsetLu12i.getOpcode () != LoongArch::LU12I_W ||
264- Lu12iImmOp.getTargetFlags () != LoongArchII::MO_None ||
265- !MRI->hasOneUse (OffsetLu12i.getOperand (0 ).getReg ()))
264+ MachineInstr *Curr = MRI->getVRegDef (Reg);
265+ if (!Curr)
266+ break ;
267+
268+ switch (Curr->getOpcode ()) {
269+ default :
270+ // Can't fold if the instruction opcode is unexpected.
266271 return false ;
267- int64_t Offset = SignExtend64<32 >(Lu12iImmOp.getImm () << 12 );
268- Offset += OffLo;
269- // LU12I.W+ORI sign extends the result.
270- Offset = SignExtend64<32 >(Offset);
271- LLVM_DEBUG (dbgs () << " Offset Instrs: " << OffsetTail
272- << " " << OffsetLu12i);
273- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
274- OffsetTail.eraseFromParent ();
275- OffsetLu12i.eraseFromParent ();
276- return true ;
277- } else if (OffsetTail.getOpcode () == LoongArch::LU12I_W) {
278- // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
279- // exists.
280- LLVM_DEBUG (dbgs () << " Offset Instr: " << OffsetTail);
281- int64_t Offset = SignExtend64<32 >(OffsetTail.getOperand (1 ).getImm () << 12 );
282- foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
283- OffsetTail.eraseFromParent ();
284- return true ;
272+ case LoongArch::ORI: {
273+ MachineOperand ImmOp = Curr->getOperand (2 );
274+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None)
275+ return false ;
276+ Offset += ImmOp.getImm ();
277+ Reg = Curr->getOperand (1 ).getReg ();
278+ Instrs.push_back (Curr);
279+ break ;
280+ }
281+ case LoongArch::LU12I_W: {
282+ MachineOperand ImmOp = Curr->getOperand (1 );
283+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None)
284+ return false ;
285+ Offset += SignExtend64<32 >(ImmOp.getImm () << 12 ) & Mask;
286+ Reg = LoongArch::R0;
287+ Instrs.push_back (Curr);
288+ break ;
289+ }
290+ case LoongArch::LU32I_D: {
291+ MachineOperand ImmOp = Curr->getOperand (2 );
292+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None || !Lo20)
293+ return false ;
294+ Offset += SignExtend64<52 >(ImmOp.getImm () << 32 ) & Mask;
295+ Mask ^= 0x000FFFFF00000000ULL ;
296+ Reg = Curr->getOperand (1 ).getReg ();
297+ Instrs.push_back (Curr);
298+ break ;
299+ }
300+ case LoongArch::LU52I_D: {
301+ MachineOperand ImmOp = Curr->getOperand (2 );
302+ if (ImmOp.getTargetFlags () != LoongArchII::MO_None || !Hi12)
303+ return false ;
304+ Offset += ImmOp.getImm () << 52 ;
305+ Mask ^= 0xFFF0000000000000ULL ;
306+ Reg = Curr->getOperand (1 ).getReg ();
307+ Instrs.push_back (Curr);
308+ break ;
309+ }
310+ }
285311 }
286- return false ;
312+
313+ // Can't fold if the offset is not extracted.
314+ if (!Offset)
315+ return false ;
316+
317+ foldOffset (Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
318+ LLVM_DEBUG (dbgs () << " Offset Instrs:\n " );
319+ for (auto I : Instrs) {
320+ LLVM_DEBUG (dbgs () << " " << *I);
321+ I->eraseFromParent ();
322+ }
323+
324+ return true ;
287325}
288326
289327bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset (MachineInstr &Hi20,
@@ -344,13 +382,6 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
344382 [[fallthrough]];
345383 case LoongArch::ADD_D:
346384 // The offset is too large to fit in the immediate field of ADDI.
347- // This can be in two forms:
348- // 1) LU12I.W hi_offset followed by:
349- // ORI lo_offset
350- // This happens in case the offset has non zero bits in
351- // both hi 20 and lo 12 bits.
352- // 2) LU12I.W (offset20)
353- // This happens in case the lower 12 bits of the offset are zeros.
354385 return foldLargeOffset (Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
355386 break ;
356387 }
0 commit comments