Skip to content

Commit 565f707

Browse files
authored
[AArch64] Allow splitting bitmasks for EOR/ORR. (#150394)
This patch extends #149095 for EOR and ORR. It uses a simple partition scheme to try to find two suitable disjoint bitmasks that can be used with EOR/ORR to reconstruct the original mask. Fixes: #148987.
1 parent aeeb9b5 commit 565f707

File tree

2 files changed

+229
-7
lines changed

2 files changed

+229
-7
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
//
99
// This pass performs below peephole optimizations on MIR level.
1010
//
11-
// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
12-
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
11+
// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12+
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
1313
//
1414
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1515
// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
128128
// Strategy used to split logical immediate bitmasks.
129129
enum class SplitStrategy {
130130
Intersect,
131+
Disjoint,
131132
};
132133
template <typename T>
133134
bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
@@ -163,6 +164,7 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
163164
template <typename T>
164165
static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
165166
T UImm = static_cast<T>(Imm);
167+
assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
166168

167169
// The bitmask immediate consists of consecutive ones. Let's say there is
168170
// constant 0b00000000001000000000010000000000 which does not consist of
@@ -190,19 +192,48 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
190192
return true;
191193
}
192194

195+
template <typename T>
196+
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
197+
T &Imm2Enc) {
198+
assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
199+
200+
// Try to split a bitmask of the form 0b00000000011000000000011110000000 into
201+
// two disjoint masks such as 0b00000000011000000000000000000000 and
202+
// 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
203+
// new masks match the original mask.
204+
unsigned LowestBitSet = llvm::countr_zero(Imm);
205+
unsigned LowestGapBitUnset =
206+
LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
207+
208+
// Create a mask for the least significant group of consecutive ones.
209+
assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
210+
T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
211+
(static_cast<T>(1) << LowestBitSet);
212+
// Create a disjoint mask for the remaining ones.
213+
T NewImm2 = Imm & ~NewImm1;
214+
215+
// Do not split if NewImm2 is not a valid bitmask immediate.
216+
if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
217+
return false;
218+
219+
Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
220+
Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
221+
return true;
222+
}
223+
193224
template <typename T>
194225
bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
195226
SplitStrategy Strategy,
196227
unsigned OtherOpc) {
197-
// Try below transformation.
228+
// Try below transformations.
198229
//
199-
// MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
200-
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
230+
// MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
231+
// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
201232
//
202233
// The mov pseudo instruction could be expanded to multiple mov instructions
203234
// later. Let's try to split the constant operand of mov instruction into two
204-
// bitmask immediates. It makes only two AND instructions instead of multiple
205-
// mov + and instructions.
235+
// bitmask immediates based on the given split strategy. It makes only two
236+
// logical instructions instead of multiple mov + logic instructions.
206237

207238
return splitTwoPartImm<T>(
208239
MI,
@@ -224,6 +255,9 @@ bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
224255
case SplitStrategy::Intersect:
225256
SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
226257
break;
258+
case SplitStrategy::Disjoint:
259+
SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
260+
break;
227261
}
228262
if (SplitSucc)
229263
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
@@ -889,6 +923,22 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
889923
Changed |= trySplitLogicalImm<uint64_t>(
890924
AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
891925
break;
926+
case AArch64::EORWrr:
927+
Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
928+
SplitStrategy::Disjoint);
929+
break;
930+
case AArch64::EORXrr:
931+
Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
932+
SplitStrategy::Disjoint);
933+
break;
934+
case AArch64::ORRWrr:
935+
Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
936+
SplitStrategy::Disjoint);
937+
break;
938+
case AArch64::ORRXrr:
939+
Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
940+
SplitStrategy::Disjoint);
941+
break;
892942
case AArch64::ORRWrs:
893943
Changed |= visitORR(MI);
894944
break;

llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll renamed to llvm/test/CodeGen/AArch64/aarch64-split-logic-bitmask-immediate.ll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,3 +370,175 @@ entry:
370370
%r = select i1 %c, i64 %a, i64 %ands
371371
ret i64 %r
372372
}
373+
374+
; Test EOR.
375+
define i32 @test1_eor(i32 %a) {
376+
; CHECK-LABEL: test1_eor:
377+
; CHECK: // %bb.0: // %entry
378+
; CHECK-NEXT: eor w8, w0, #0x400
379+
; CHECK-NEXT: eor w0, w8, #0x200000
380+
; CHECK-NEXT: ret
381+
entry:
382+
%eor = xor i32 %a, 2098176
383+
ret i32 %eor
384+
}
385+
386+
; This constant should not be split because it can be handled by one mov.
387+
define i32 @test2_eor(i32 %a) {
388+
; CHECK-LABEL: test2_eor:
389+
; CHECK: // %bb.0: // %entry
390+
; CHECK-NEXT: mov w8, #135 // =0x87
391+
; CHECK-NEXT: eor w0, w0, w8
392+
; CHECK-NEXT: ret
393+
entry:
394+
%eor = xor i32 %a, 135
395+
ret i32 %eor
396+
}
397+
398+
; This constant should not be split because the split immediate is not valid
399+
; bitmask immediate.
400+
define i32 @test3_eor(i32 %a) {
401+
; CHECK-LABEL: test3_eor:
402+
; CHECK: // %bb.0: // %entry
403+
; CHECK-NEXT: mov w8, #1024 // =0x400
404+
; CHECK-NEXT: movk w8, #33, lsl #16
405+
; CHECK-NEXT: eor w0, w0, w8
406+
; CHECK-NEXT: ret
407+
entry:
408+
%eor = xor i32 %a, 2163712
409+
ret i32 %eor
410+
}
411+
412+
define i64 @test4_eor(i64 %a) {
413+
; CHECK-LABEL: test4_eor:
414+
; CHECK: // %bb.0: // %entry
415+
; CHECK-NEXT: eor x8, x0, #0x400
416+
; CHECK-NEXT: eor x0, x8, #0x200000
417+
; CHECK-NEXT: ret
418+
entry:
419+
%eor = xor i64 %a, 2098176
420+
ret i64 %eor
421+
}
422+
423+
define i64 @test5_eor(i64 %a) {
424+
; CHECK-LABEL: test5_eor:
425+
; CHECK: // %bb.0: // %entry
426+
; CHECK-NEXT: eor x8, x0, #0x4000
427+
; CHECK-NEXT: eor x0, x8, #0x200000000
428+
; CHECK-NEXT: ret
429+
entry:
430+
%eor = xor i64 %a, 8589950976
431+
ret i64 %eor
432+
}
433+
434+
; This constant should not be split because it can be handled by one mov.
435+
define i64 @test6_eor(i64 %a) {
436+
; CHECK-LABEL: test6_eor:
437+
; CHECK: // %bb.0: // %entry
438+
; CHECK-NEXT: mov w8, #135 // =0x87
439+
; CHECK-NEXT: eor x0, x0, x8
440+
; CHECK-NEXT: ret
441+
entry:
442+
%eor = xor i64 %a, 135
443+
ret i64 %eor
444+
}
445+
446+
; This constant should not be split because the split immediate is not valid
447+
; bitmask immediate.
448+
define i64 @test7_eor(i64 %a) {
449+
; CHECK-LABEL: test7_eor:
450+
; CHECK: // %bb.0: // %entry
451+
; CHECK-NEXT: mov w8, #1024 // =0x400
452+
; CHECK-NEXT: movk w8, #33, lsl #16
453+
; CHECK-NEXT: eor x0, x0, x8
454+
; CHECK-NEXT: ret
455+
entry:
456+
%eor = xor i64 %a, 2163712
457+
ret i64 %eor
458+
}
459+
460+
; Test ORR.
461+
define i32 @test1_orr(i32 %a) {
462+
; CHECK-LABEL: test1_orr:
463+
; CHECK: // %bb.0: // %entry
464+
; CHECK-NEXT: orr w8, w0, #0x400
465+
; CHECK-NEXT: orr w0, w8, #0x200000
466+
; CHECK-NEXT: ret
467+
entry:
468+
%orr = or i32 %a, 2098176
469+
ret i32 %orr
470+
}
471+
472+
; This constant should not be split because it can be handled by one mov.
473+
define i32 @test2_orr(i32 %a) {
474+
; CHECK-LABEL: test2_orr:
475+
; CHECK: // %bb.0: // %entry
476+
; CHECK-NEXT: mov w8, #135 // =0x87
477+
; CHECK-NEXT: orr w0, w0, w8
478+
; CHECK-NEXT: ret
479+
entry:
480+
%orr = or i32 %a, 135
481+
ret i32 %orr
482+
}
483+
484+
; This constant should not be split because the split immediate is not valid
485+
; bitmask immediate.
486+
define i32 @test3_orr(i32 %a) {
487+
; CHECK-LABEL: test3_orr:
488+
; CHECK: // %bb.0: // %entry
489+
; CHECK-NEXT: mov w8, #1024 // =0x400
490+
; CHECK-NEXT: movk w8, #33, lsl #16
491+
; CHECK-NEXT: orr w0, w0, w8
492+
; CHECK-NEXT: ret
493+
entry:
494+
%orr = or i32 %a, 2163712
495+
ret i32 %orr
496+
}
497+
498+
define i64 @test4_orr(i64 %a) {
499+
; CHECK-LABEL: test4_orr:
500+
; CHECK: // %bb.0: // %entry
501+
; CHECK-NEXT: orr x8, x0, #0x400
502+
; CHECK-NEXT: orr x0, x8, #0x200000
503+
; CHECK-NEXT: ret
504+
entry:
505+
%orr = or i64 %a, 2098176
506+
ret i64 %orr
507+
}
508+
509+
define i64 @test5_orr(i64 %a) {
510+
; CHECK-LABEL: test5_orr:
511+
; CHECK: // %bb.0: // %entry
512+
; CHECK-NEXT: orr x8, x0, #0x4000
513+
; CHECK-NEXT: orr x0, x8, #0x200000000
514+
; CHECK-NEXT: ret
515+
entry:
516+
%orr = or i64 %a, 8589950976
517+
ret i64 %orr
518+
}
519+
520+
; This constant should not be split because it can be handled by one mov.
521+
define i64 @test6_orr(i64 %a) {
522+
; CHECK-LABEL: test6_orr:
523+
; CHECK: // %bb.0: // %entry
524+
; CHECK-NEXT: mov w8, #135 // =0x87
525+
; CHECK-NEXT: orr x0, x0, x8
526+
; CHECK-NEXT: ret
527+
entry:
528+
%orr = or i64 %a, 135
529+
ret i64 %orr
530+
}
531+
532+
; This constant should not be split because the split immediate is not valid
533+
; bitmask immediate.
534+
define i64 @test7_orr(i64 %a) {
535+
; CHECK-LABEL: test7_orr:
536+
; CHECK: // %bb.0: // %entry
537+
; CHECK-NEXT: mov w8, #1024 // =0x400
538+
; CHECK-NEXT: movk w8, #33, lsl #16
539+
; CHECK-NEXT: orr x0, x0, x8
540+
; CHECK-NEXT: ret
541+
entry:
542+
%orr = or i64 %a, 2163712
543+
ret i64 %orr
544+
}

0 commit comments

Comments
 (0)