@@ -426,28 +426,54 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
426426static void applyBitsNotInRegMaskToRegUnitsMask (const TargetRegisterInfo &TRI,
427427 BitVector &RUs,
428428 const uint32_t *Mask) {
429- BitVector ClobberedRUs (TRI.getNumRegUnits (), true );
429+ // FIXME: This intentionally works in reverse due to some issues with the
430+ // Register Units infrastructure.
431+ //
432+ // This is used to apply callee-saved-register masks to the clobbered regunits
433+ // mask.
434+ //
435+ // The right way to approach this is to start with a BitVector full of ones,
436+ // then reset all the bits of the regunits of each register that is set in the
437+ // mask (registers preserved), then OR the resulting bits with the Clobbers
438+ // mask. This correctly prioritizes the saved registers, so if a RU is shared
439+ // between a register that is preserved, and one that is NOT preserved, that
440+ // RU will not be set in the output vector (the clobbers).
441+ //
442+ // What we have to do for now is the opposite: we have to assume that the
443+ // regunits of all registers that are NOT preserved are clobbered, even if
444+ // those regunits are preserved by another register. So if a RU is shared
445+ // like described previously, that RU will be set.
446+ //
447+ // This is to work around an issue which appears in AArch64, but isn't
448+ // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
449+ // register (lower 64 bits). A few Dn registers are preserved by some calling
450+ // conventions, but Qn and Dn share exactly the same reg units.
451+ //
452+ // If we do this the right way, Qn will be marked as NOT clobbered even though
453+ // its upper 64 bits are NOT preserved. The conservative approach handles this
454+ // correctly at the cost of some missed optimizations on other targets.
455+ //
456+ // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
457+ // should have an extra RegUnit to model the "unknown" bits not covered by the
458+ // subregs.
459+ BitVector RUsFromRegsNotInMask (TRI.getNumRegUnits ());
430460 const unsigned NumRegs = TRI.getNumRegs ();
431461 const unsigned MaskWords = (NumRegs + 31 ) / 32 ;
432462 for (unsigned K = 0 ; K < MaskWords; ++K) {
433463 const uint32_t Word = Mask[K];
434- if (!Word)
435- continue ;
436-
437464 for (unsigned Bit = 0 ; Bit < 32 ; ++Bit) {
438465 const unsigned PhysReg = (K * 32 ) + Bit;
439466 if (PhysReg == NumRegs)
440467 break ;
441468
442- // Check if we have a valid PhysReg that is set in the mask.
443- if ((Word >> Bit) & 1 ) {
469+ if (PhysReg && !((Word >> Bit) & 1 )) {
444470 for (MCRegUnitIterator RUI (PhysReg, &TRI); RUI.isValid (); ++RUI)
445- ClobberedRUs. reset (*RUI);
471+ RUsFromRegsNotInMask. set (*RUI);
446472 }
447473 }
448474 }
449475
450- RUs |= ClobberedRUs ;
476+ RUs |= RUsFromRegsNotInMask ;
451477}
452478
453479// / Examine the instruction for potentai LICM candidate. Also
0 commit comments