diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index b97d6229b1d01..fd4ef2aa28f8a 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -8,8 +8,8 @@ // // This pass performs below peephole optimizations on MIR level. // -// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri -// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri +// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri +// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri // // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi // MOVi64imm + ADDXrr ==> ADDXri + ADDXri @@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { // Strategy used to split logical immediate bitmasks. enum class SplitStrategy { Intersect, + Disjoint, }; template bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI, @@ -163,6 +164,7 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", template static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { T UImm = static_cast(Imm); + assert(UImm && (UImm != ~static_cast(0)) && "Invalid immediate!"); // The bitmask immediate consists of consecutive ones. Let's say there is // constant 0b00000000001000000000010000000000 which does not consist of @@ -190,19 +192,48 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { return true; } +template +static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, + T &Imm2Enc) { + assert(Imm && (Imm != ~static_cast(0)) && "Invalid immediate!"); + + // Try to split a bitmask of the form 0b00000000011000000000011110000000 into + // two disjoint masks such as 0b00000000011000000000000000000000 and + // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the + // new masks match the original mask. + unsigned LowestBitSet = llvm::countr_zero(Imm); + unsigned LowestGapBitUnset = + LowestBitSet + llvm::countr_one(Imm >> LowestBitSet); + + // Create a mask for the least significant group of consecutive ones. + assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!"); + T NewImm1 = (static_cast(1) << LowestGapBitUnset) - + (static_cast(1) << LowestBitSet); + // Create a disjoint mask for the remaining ones. + T NewImm2 = Imm & ~NewImm1; + + // Do not split if NewImm2 is not a valid bitmask immediate. + if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) + return false; + + Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); + Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); + return true; +} + template bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI, SplitStrategy Strategy, unsigned OtherOpc) { - // Try below transformation. + // Try below transformations. // - // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri - // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri + // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri + // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri // // The mov pseudo instruction could be expanded to multiple mov instructions // later. Let's try to split the constant operand of mov instruction into two - // bitmask immediates. It makes only two AND instructions instead of multiple - // mov + and instructions. + // bitmask immediates based on the given split strategy. It makes only two + // logical instructions instead of multiple mov + logic instructions. return splitTwoPartImm( MI, @@ -224,6 +255,9 @@ bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI, case SplitStrategy::Intersect: SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1); break; + case SplitStrategy::Disjoint: + SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1); + break; } if (SplitSucc) return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc); @@ -889,6 +923,22 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { Changed |= trySplitLogicalImm( AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri); break; + case AArch64::EORWrr: + Changed |= trySplitLogicalImm(AArch64::EORWri, MI, + SplitStrategy::Disjoint); + break; + case AArch64::EORXrr: + Changed |= trySplitLogicalImm(AArch64::EORXri, MI, + SplitStrategy::Disjoint); + break; + case AArch64::ORRWrr: + Changed |= trySplitLogicalImm(AArch64::ORRWri, MI, + SplitStrategy::Disjoint); + break; + case AArch64::ORRXrr: + Changed |= trySplitLogicalImm(AArch64::ORRXri, MI, + SplitStrategy::Disjoint); + break; case AArch64::ORRWrs: Changed |= visitORR(MI); break; diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll similarity index 71% rename from llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll rename to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll index 113eb14ca4803..4db9db9185206 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll @@ -370,3 +370,175 @@ entry: %r = select i1 %c, i64 %a, i64 %ands ret i64 %r } + +; Test EOR. +define i32 @test1_eor(i32 %a) { +; CHECK-LABEL: test1_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor w8, w0, #0x400 +; CHECK-NEXT: eor w0, w8, #0x200000 +; CHECK-NEXT: ret +entry: + %eor = xor i32 %a, 2098176 + ret i32 %eor +} + +; This constant should not be split because it can be handled by one mov. +define i32 @test2_eor(i32 %a) { +; CHECK-LABEL: test2_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret +entry: + %eor = xor i32 %a, 135 + ret i32 %eor +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i32 @test3_eor(i32 %a) { +; CHECK-LABEL: test3_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret +entry: + %eor = xor i32 %a, 2163712 + ret i32 %eor +} + +define i64 @test4_eor(i64 %a) { +; CHECK-LABEL: test4_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x0, #0x400 +; CHECK-NEXT: eor x0, x8, #0x200000 +; CHECK-NEXT: ret +entry: + %eor = xor i64 %a, 2098176 + ret i64 %eor +} + +define i64 @test5_eor(i64 %a) { +; CHECK-LABEL: test5_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor x8, x0, #0x4000 +; CHECK-NEXT: eor x0, x8, #0x200000000 +; CHECK-NEXT: ret +entry: + %eor = xor i64 %a, 8589950976 + ret i64 %eor +} + +; This constant should not be split because it can be handled by one mov. +define i64 @test6_eor(i64 %a) { +; CHECK-LABEL: test6_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: eor x0, x0, x8 +; CHECK-NEXT: ret +entry: + %eor = xor i64 %a, 135 + ret i64 %eor +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i64 @test7_eor(i64 %a) { +; CHECK-LABEL: test7_eor: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: eor x0, x0, x8 +; CHECK-NEXT: ret +entry: + %eor = xor i64 %a, 2163712 + ret i64 %eor +} + +; Test ORR. +define i32 @test1_orr(i32 %a) { +; CHECK-LABEL: test1_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr w8, w0, #0x400 +; CHECK-NEXT: orr w0, w8, #0x200000 +; CHECK-NEXT: ret +entry: + %orr = or i32 %a, 2098176 + ret i32 %orr +} + +; This constant should not be split because it can be handled by one mov. +define i32 @test2_orr(i32 %a) { +; CHECK-LABEL: test2_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: orr w0, w0, w8 +; CHECK-NEXT: ret +entry: + %orr = or i32 %a, 135 + ret i32 %orr +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i32 @test3_orr(i32 %a) { +; CHECK-LABEL: test3_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: orr w0, w0, w8 +; CHECK-NEXT: ret +entry: + %orr = or i32 %a, 2163712 + ret i32 %orr +} + +define i64 @test4_orr(i64 %a) { +; CHECK-LABEL: test4_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x0, #0x400 +; CHECK-NEXT: orr x0, x8, #0x200000 +; CHECK-NEXT: ret +entry: + %orr = or i64 %a, 2098176 + ret i64 %orr +} + +define i64 @test5_orr(i64 %a) { +; CHECK-LABEL: test5_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x0, #0x4000 +; CHECK-NEXT: orr x0, x8, #0x200000000 +; CHECK-NEXT: ret +entry: + %orr = or i64 %a, 8589950976 + ret i64 %orr +} + +; This constant should not be split because it can be handled by one mov. +define i64 @test6_orr(i64 %a) { +; CHECK-LABEL: test6_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: orr x0, x0, x8 +; CHECK-NEXT: ret +entry: + %orr = or i64 %a, 135 + ret i64 %orr +} + +; This constant should not be split because the split immediate is not valid +; bitmask immediate. +define i64 @test7_orr(i64 %a) { +; CHECK-LABEL: test7_orr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #1024 // =0x400 +; CHECK-NEXT: movk w8, #33, lsl #16 +; CHECK-NEXT: orr x0, x0, x8 +; CHECK-NEXT: ret +entry: + %orr = or i64 %a, 2163712 + ret i64 %orr +}