Skip to content

Commit 2c933ed

Browse files
committed
[AArch64] Allow splitting bitmasks for EOR/ORR.
This patch extends #149095 for EOR and ORR. It uses a simple partition scheme to try to find two suitable disjoint bitmasks that can be used with EOR/ORR to reconstruct the original mask.
1 parent 97c5655 commit 2c933ed

File tree

2 files changed

+68
-25
lines changed

2 files changed

+68
-25
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
//
99
// This pass performs below peephole optimizations on MIR level.
1010
//
11-
// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
12-
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
11+
// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12+
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
1313
//
1414
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1515
// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
128128
// Strategy used to split logical immediate bitmasks.
129129
enum class SplitStrategy {
130130
Intersect,
131+
Disjoint,
131132
};
132133
template <typename T>
133134
bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
@@ -190,19 +191,48 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
190191
return true;
191192
}
192193

194+
template <typename T>
195+
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
196+
T &Imm2Enc) {
197+
// Try to split a bitmask of the form 0b00000000011000000000011110000000 into
198+
// two disjoint masks such as 0b00000000011000000000000000000000 and
199+
// 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
200+
// new masks match the original mask.
201+
unsigned LowestBitSet = llvm::countr_zero(Imm);
202+
unsigned LowestGapBitUnset =
203+
LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
204+
205+
// Create a mask for the least significant group of consecutive ones.
206+
T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
207+
(static_cast<T>(1) << LowestBitSet);
208+
// Create a disjoint mask for the remaining ones.
209+
T NewImm2 = Imm & ~NewImm1;
210+
assert(((NewImm1 & NewImm2) == 0) && "Non-disjoint immediates!");
211+
212+
if (AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) {
213+
assert(((NewImm1 | NewImm2) == Imm) && "Invalid immediates!");
214+
assert(((NewImm1 ^ NewImm2) == Imm) && "Invalid immediates!");
215+
Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
216+
Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
217+
return true;
218+
}
219+
220+
return false;
221+
}
222+
193223
template <typename T>
194224
bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
195225
SplitStrategy Strategy,
196226
unsigned OtherOpc) {
197-
// Try below transformation.
227+
// Try below transformations.
198228
//
199-
// MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
200-
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
229+
// MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
230+
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
201231
//
202232
// The mov pseudo instruction could be expanded to multiple mov instructions
203233
// later. Let's try to split the constant operand of mov instruction into two
204-
// bitmask immediates. It makes only two AND instructions instead of multiple
205-
// mov + and instructions.
234+
// bitmask immediates based on the given split strategy. It makes only two
235+
// logical instructions instead of multiple mov + logic instructions.
206236

207237
return splitTwoPartImm<T>(
208238
MI,
@@ -224,6 +254,9 @@ bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
224254
case SplitStrategy::Intersect:
225255
SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
226256
break;
257+
case SplitStrategy::Disjoint:
258+
SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
259+
break;
227260
}
228261
if (SplitSucc)
229262
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
@@ -889,6 +922,22 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
889922
Changed |= trySplitLogicalImm<uint64_t>(
890923
AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
891924
break;
925+
case AArch64::EORWrr:
926+
Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
927+
SplitStrategy::Disjoint);
928+
break;
929+
case AArch64::EORXrr:
930+
Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
931+
SplitStrategy::Disjoint);
932+
break;
933+
case AArch64::ORRWrr:
934+
Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
935+
SplitStrategy::Disjoint);
936+
break;
937+
case AArch64::ORRXrr:
938+
Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
939+
SplitStrategy::Disjoint);
940+
break;
892941
case AArch64::ORRWrs:
893942
Changed |= visitORR(MI);
894943
break;

llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,8 @@ entry:
375375
define i32 @test1_eor(i32 %a) {
376376
; CHECK-LABEL: test1_eor:
377377
; CHECK: // %bb.0: // %entry
378-
; CHECK-NEXT: mov w8, #1024 // =0x400
379-
; CHECK-NEXT: movk w8, #32, lsl #16
380-
; CHECK-NEXT: eor w0, w0, w8
378+
; CHECK-NEXT: eor w8, w0, #0x400
379+
; CHECK-NEXT: eor w0, w8, #0x200000
381380
; CHECK-NEXT: ret
382381
entry:
383382
%eor = xor i32 %a, 2098176
@@ -413,9 +412,8 @@ entry:
413412
define i64 @test4_eor(i64 %a) {
414413
; CHECK-LABEL: test4_eor:
415414
; CHECK: // %bb.0: // %entry
416-
; CHECK-NEXT: mov w8, #1024 // =0x400
417-
; CHECK-NEXT: movk w8, #32, lsl #16
418-
; CHECK-NEXT: eor x0, x0, x8
415+
; CHECK-NEXT: eor x8, x0, #0x400
416+
; CHECK-NEXT: eor x0, x8, #0x200000
419417
; CHECK-NEXT: ret
420418
entry:
421419
%eor = xor i64 %a, 2098176
@@ -425,9 +423,8 @@ entry:
425423
define i64 @test5_eor(i64 %a) {
426424
; CHECK-LABEL: test5_eor:
427425
; CHECK: // %bb.0: // %entry
428-
; CHECK-NEXT: mov x8, #16384 // =0x4000
429-
; CHECK-NEXT: movk x8, #2, lsl #32
430-
; CHECK-NEXT: eor x0, x0, x8
426+
; CHECK-NEXT: eor x8, x0, #0x4000
427+
; CHECK-NEXT: eor x0, x8, #0x200000000
431428
; CHECK-NEXT: ret
432429
entry:
433430
%eor = xor i64 %a, 8589950976
@@ -464,9 +461,8 @@ entry:
464461
define i32 @test1_orr(i32 %a) {
465462
; CHECK-LABEL: test1_orr:
466463
; CHECK: // %bb.0: // %entry
467-
; CHECK-NEXT: mov w8, #1024 // =0x400
468-
; CHECK-NEXT: movk w8, #32, lsl #16
469-
; CHECK-NEXT: orr w0, w0, w8
464+
; CHECK-NEXT: orr w8, w0, #0x400
465+
; CHECK-NEXT: orr w0, w8, #0x200000
470466
; CHECK-NEXT: ret
471467
entry:
472468
%orr = or i32 %a, 2098176
@@ -502,9 +498,8 @@ entry:
502498
define i64 @test4_orr(i64 %a) {
503499
; CHECK-LABEL: test4_orr:
504500
; CHECK: // %bb.0: // %entry
505-
; CHECK-NEXT: mov w8, #1024 // =0x400
506-
; CHECK-NEXT: movk w8, #32, lsl #16
507-
; CHECK-NEXT: orr x0, x0, x8
501+
; CHECK-NEXT: orr x8, x0, #0x400
502+
; CHECK-NEXT: orr x0, x8, #0x200000
508503
; CHECK-NEXT: ret
509504
entry:
510505
%orr = or i64 %a, 2098176
@@ -514,9 +509,8 @@ entry:
514509
define i64 @test5_orr(i64 %a) {
515510
; CHECK-LABEL: test5_orr:
516511
; CHECK: // %bb.0: // %entry
517-
; CHECK-NEXT: mov x8, #16384 // =0x4000
518-
; CHECK-NEXT: movk x8, #2, lsl #32
519-
; CHECK-NEXT: orr x0, x0, x8
512+
; CHECK-NEXT: orr x8, x0, #0x4000
513+
; CHECK-NEXT: orr x0, x8, #0x200000000
520514
; CHECK-NEXT: ret
521515
entry:
522516
%orr = or i64 %a, 8589950976

0 commit comments

Comments
 (0)