Skip to content

[AArch64] Allow splitting bitmasks for EOR/ORR. #150394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 57 additions & 7 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
//
// This pass performs below peephole optimizations on MIR level.
//
// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
Expand Down Expand Up @@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
// Strategy used to split logical immediate bitmasks.
enum class SplitStrategy {
Intersect,
Disjoint,
};
template <typename T>
bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
Expand Down Expand Up @@ -163,6 +164,7 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
template <typename T>
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
T UImm = static_cast<T>(Imm);
assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");

// The bitmask immediate consists of consecutive ones. Let's say there is
// constant 0b00000000001000000000010000000000 which does not consist of
Expand Down Expand Up @@ -190,19 +192,48 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
return true;
}

template <typename T>
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
T &Imm2Enc) {
assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");

// Try to split a bitmask of the form 0b00000000011000000000011110000000 into
// two disjoint masks such as 0b00000000011000000000000000000000 and
// 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
// new masks match the original mask.
unsigned LowestBitSet = llvm::countr_zero(Imm);
unsigned LowestGapBitUnset =
LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);

// Create a mask for the least significant group of consecutive ones.
assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
(static_cast<T>(1) << LowestBitSet);
// Create a disjoint mask for the remaining ones.
T NewImm2 = Imm & ~NewImm1;

// Do not split if NewImm2 is not a valid bitmask immediate.
if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
return false;

Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
return true;
}

template <typename T>
bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
SplitStrategy Strategy,
unsigned OtherOpc) {
// Try below transformation.
// Try below transformations.
//
// MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
// MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
// bitmask immediates. It makes only two AND instructions instead of multiple
// mov + and instructions.
// bitmask immediates based on the given split strategy. It makes only two
// logical instructions instead of multiple mov + logic instructions.

return splitTwoPartImm<T>(
MI,
Expand All @@ -224,6 +255,9 @@ bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
case SplitStrategy::Intersect:
SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
break;
case SplitStrategy::Disjoint:
SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
break;
}
if (SplitSucc)
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
Expand Down Expand Up @@ -889,6 +923,22 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
Changed |= trySplitLogicalImm<uint64_t>(
AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
break;
case AArch64::EORWrr:
Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
SplitStrategy::Disjoint);
break;
case AArch64::EORXrr:
Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
SplitStrategy::Disjoint);
break;
case AArch64::ORRWrr:
Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
SplitStrategy::Disjoint);
break;
case AArch64::ORRXrr:
Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
SplitStrategy::Disjoint);
break;
case AArch64::ORRWrs:
Changed |= visitORR(MI);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,3 +370,175 @@ entry:
%r = select i1 %c, i64 %a, i64 %ands
ret i64 %r
}

; Test EOR.
define i32 @test1_eor(i32 %a) {
; CHECK-LABEL: test1_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor w8, w0, #0x400
; CHECK-NEXT: eor w0, w8, #0x200000
; CHECK-NEXT: ret
entry:
%eor = xor i32 %a, 2098176
ret i32 %eor
}

; This constant should not be split because it can be handled by one mov.
define i32 @test2_eor(i32 %a) {
; CHECK-LABEL: test2_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #135 // =0x87
; CHECK-NEXT: eor w0, w0, w8
; CHECK-NEXT: ret
entry:
%eor = xor i32 %a, 135
ret i32 %eor
}

; This constant should not be split because the split immediate is not valid
; bitmask immediate.
define i32 @test3_eor(i32 %a) {
; CHECK-LABEL: test3_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: movk w8, #33, lsl #16
; CHECK-NEXT: eor w0, w0, w8
; CHECK-NEXT: ret
entry:
%eor = xor i32 %a, 2163712
ret i32 %eor
}

define i64 @test4_eor(i64 %a) {
; CHECK-LABEL: test4_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor x8, x0, #0x400
; CHECK-NEXT: eor x0, x8, #0x200000
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 2098176
ret i64 %eor
}

define i64 @test5_eor(i64 %a) {
; CHECK-LABEL: test5_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor x8, x0, #0x4000
; CHECK-NEXT: eor x0, x8, #0x200000000
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 8589950976
ret i64 %eor
}

; This constant should not be split because it can be handled by one mov.
define i64 @test6_eor(i64 %a) {
; CHECK-LABEL: test6_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #135 // =0x87
; CHECK-NEXT: eor x0, x0, x8
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 135
ret i64 %eor
}

; This constant should not be split because the split immediate is not valid
; bitmask immediate.
define i64 @test7_eor(i64 %a) {
; CHECK-LABEL: test7_eor:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: movk w8, #33, lsl #16
; CHECK-NEXT: eor x0, x0, x8
; CHECK-NEXT: ret
entry:
%eor = xor i64 %a, 2163712
ret i64 %eor
}

; Test ORR.
define i32 @test1_orr(i32 %a) {
; CHECK-LABEL: test1_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr w8, w0, #0x400
; CHECK-NEXT: orr w0, w8, #0x200000
; CHECK-NEXT: ret
entry:
%orr = or i32 %a, 2098176
ret i32 %orr
}

; This constant should not be split because it can be handled by one mov.
define i32 @test2_orr(i32 %a) {
; CHECK-LABEL: test2_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #135 // =0x87
; CHECK-NEXT: orr w0, w0, w8
; CHECK-NEXT: ret
entry:
%orr = or i32 %a, 135
ret i32 %orr
}

; This constant should not be split because the split immediate is not valid
; bitmask immediate.
define i32 @test3_orr(i32 %a) {
; CHECK-LABEL: test3_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: movk w8, #33, lsl #16
; CHECK-NEXT: orr w0, w0, w8
; CHECK-NEXT: ret
entry:
%orr = or i32 %a, 2163712
ret i32 %orr
}

define i64 @test4_orr(i64 %a) {
; CHECK-LABEL: test4_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr x8, x0, #0x400
; CHECK-NEXT: orr x0, x8, #0x200000
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 2098176
ret i64 %orr
}

define i64 @test5_orr(i64 %a) {
; CHECK-LABEL: test5_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr x8, x0, #0x4000
; CHECK-NEXT: orr x0, x8, #0x200000000
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 8589950976
ret i64 %orr
}

; This constant should not be split because it can be handled by one mov.
define i64 @test6_orr(i64 %a) {
; CHECK-LABEL: test6_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #135 // =0x87
; CHECK-NEXT: orr x0, x0, x8
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 135
ret i64 %orr
}

; This constant should not be split because the split immediate is not valid
; bitmask immediate.
define i64 @test7_orr(i64 %a) {
; CHECK-LABEL: test7_orr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: movk w8, #33, lsl #16
; CHECK-NEXT: orr x0, x0, x8
; CHECK-NEXT: ret
entry:
%orr = or i64 %a, 2163712
ret i64 %orr
}