Skip to content

Commit 2543774

Browse files
committed
[AArch64] Allow splitting bitmasks for ANDS.
We already do this for AND; we can reuse the same infrastructure for ANDS so long as the second instruction of the pair is ANDS.
1 parent fcba958 commit 2543774

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
//
99
// This pass performs below peephole optimizations on MIR level.
1010
//
11-
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12-
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
11+
// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
12+
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
1313
//
1414
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1515
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
126126
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
127127

128128
template <typename T>
129-
bool visitAND(unsigned Opc, MachineInstr &MI);
129+
bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0);
130130
bool visitORR(MachineInstr &MI);
131131
bool visitCSEL(MachineInstr &MI);
132132
bool visitINSERT(MachineInstr &MI);
@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
194194
}
195195

196196
template <typename T>
197-
bool AArch64MIPeepholeOpt::visitAND(
198-
unsigned Opc, MachineInstr &MI) {
197+
bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI,
198+
unsigned OtherOpc) {
199199
// Try below transformation.
200200
//
201-
// MOVi32imm + ANDWrr ==> ANDWri + ANDWri
202-
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
201+
// MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri
202+
// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri
203203
//
204204
// The mov pseudo instruction could be expanded to multiple mov instructions
205205
// later. Let's try to split the constant operand of mov instruction into two
206206
// bitmask immediates. It makes only two AND instructions instead of multiple
207-
// mov + and instructions.
207+
// mov + AND instructions.
208208

209209
return splitTwoPartImm<T>(
210210
MI,
211-
[Opc](T Imm, unsigned RegSize, T &Imm0,
212-
T &Imm1) -> std::optional<OpcodePair> {
211+
[Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
212+
T &Imm1) -> std::optional<OpcodePair> {
213213
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
214-
return std::make_pair(Opc, Opc);
214+
return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
215215
return std::nullopt;
216216
},
217217
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
@@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
864864
case AArch64::ANDXrr:
865865
Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
866866
break;
867+
case AArch64::ANDSWrr:
868+
Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri);
869+
break;
870+
case AArch64::ANDSXrr:
871+
Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri);
872+
break;
867873
case AArch64::ORRWrs:
868874
Changed |= visitORR(MI);
869875
break;

llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,8 @@ entry:
268268
define i32 @test1_ands(i32 %a) {
269269
; CHECK-LABEL: test1_ands:
270270
; CHECK: // %bb.0: // %entry
271-
; CHECK-NEXT: mov w8, #1024 // =0x400
272-
; CHECK-NEXT: movk w8, #32, lsl #16
273-
; CHECK-NEXT: ands w8, w0, w8
271+
; CHECK-NEXT: and w8, w0, #0x3ffc00
272+
; CHECK-NEXT: ands w8, w8, #0xffe007ff
274273
; CHECK-NEXT: csel w0, w8, wzr, eq
275274
; CHECK-NEXT: ret
276275
entry:
@@ -315,9 +314,8 @@ entry:
315314
define i64 @test4_ands(i64 %a) {
316315
; CHECK-LABEL: test4_ands:
317316
; CHECK: // %bb.0: // %entry
318-
; CHECK-NEXT: mov w8, #1024 // =0x400
319-
; CHECK-NEXT: movk w8, #32, lsl #16
320-
; CHECK-NEXT: ands x8, x0, x8
317+
; CHECK-NEXT: and x8, x0, #0x3ffc00
318+
; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff
321319
; CHECK-NEXT: csel x0, x8, xzr, eq
322320
; CHECK-NEXT: ret
323321
entry:
@@ -330,9 +328,8 @@ entry:
330328
define i64 @test5_ands(i64 %a) {
331329
; CHECK-LABEL: test5_ands:
332330
; CHECK: // %bb.0: // %entry
333-
; CHECK-NEXT: mov x8, #16384 // =0x4000
334-
; CHECK-NEXT: movk x8, #2, lsl #32
335-
; CHECK-NEXT: ands x8, x0, x8
331+
; CHECK-NEXT: and x8, x0, #0x3ffffc000
332+
; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff
336333
; CHECK-NEXT: csel x0, x8, xzr, eq
337334
; CHECK-NEXT: ret
338335
entry:

0 commit comments

Comments
 (0)