@@ -20175,53 +20175,30 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Imm, const APInt &Demanded,
2017520175 // Clear bits that are not demanded.
2017620176 Imm &= DemandedBits;
2017720177
20178- while (true) {
20179- // The goal here is to set the non-demanded bits in a way that minimizes
20180- // the number of switching between 0 and 1. In order to achieve this goal,
20181- // we set the non-demanded bits to the value of the preceding demanded bits.
20182- // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
20183- // non-demanded bit), we copy bit0 (1) to the least significant 'x',
20184- // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
20185- // The final result is 0b11000011.
20186- unsigned NonDemandedBits = ~DemandedBits;
20187- unsigned InvertedImm = ~Imm & DemandedBits;
20188- unsigned RotatedImm =
20189- ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
20190- NonDemandedBits;
20191- unsigned Sum = RotatedImm + NonDemandedBits;
20192- bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
20193- unsigned Ones = (Sum + Carry) & NonDemandedBits;
20194- NewImm = (Imm | Ones) & Mask;
20195-
20196- // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
20197- // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
20198- // we halve the element size and continue the search.
20199- if (isShiftedMask_32(NewImm) || isShiftedMask_32(~(NewImm | ~Mask)))
20200- break;
20201-
20202- // We cannot shrink the element size any further if it is 2-bits.
20203- if (EltSize == 2)
20204- return false;
20205-
20206- EltSize /= 2;
20207- Mask >>= EltSize;
20208- unsigned Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
20209-
20210- // Return if there is mismatch in any of the demanded bits of Imm and Hi.
20211- if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
20178+ // Try to extend the immediate to a legal ARM rotating immediate
20179+ // by filling in non-demanded bits. ARM supports:
20180+ // - An 8-bit value rotated by an even number of bits (0, 2, 4, 6, ..., 30)
20181+ // - Any 8-bit immediate (Thumb2 also supports 16-bit splat patterns)
20182+ unsigned NonDemandedBits = ~DemandedBits;
20183+
20184+ // Try filling with 0
20185+ NewImm = Imm & DemandedBits;
20186+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20187+ ((Op.getOpcode() == ISD::AND ||
20188+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20189+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20190+ ++NumOptimizedImms;
20191+ } else {
20192+ // Try filling with 1
20193+ NewImm = Imm | NonDemandedBits;
20194+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
20195+ ((Op.getOpcode() == ISD::AND ||
20196+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
20197+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
20198+ ++NumOptimizedImms;
20199+ } else {
2021220200 return false;
20213-
20214- // Merge the upper and lower halves of Imm and DemandedBits.
20215- Imm |= Hi;
20216- DemandedBits |= DemandedBitsHi;
20217- }
20218-
20219- ++NumOptimizedImms;
20220-
20221- // Replicate the element across the register width.
20222- while (EltSize < 32) {
20223- NewImm |= NewImm << EltSize;
20224- EltSize *= 2;
20201+ }
2022520202 }
2022620203
2022720204 (void)OldImm;
0 commit comments