Skip to content

Commit 8c1aff4

Browse files
committed
[AArch64] Allow peephole to optimize AND + signed compare with 0
This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same. Fixes: #154387
1 parent 2b48dfd commit 8c1aff4

File tree

3 files changed

+60
-27
lines changed

3 files changed

+60
-27
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) {
17801780
case AArch64::SUBSWri:
17811781
case AArch64::SUBSXrr:
17821782
case AArch64::SUBSXri:
1783+
case AArch64::ANDSWri:
1784+
case AArch64::ANDSWrr:
1785+
case AArch64::ANDSWrs:
1786+
case AArch64::ANDSXri:
1787+
case AArch64::ANDSXrr:
1788+
case AArch64::ANDSXrs:
1789+
case AArch64::BICSWrr:
1790+
case AArch64::BICSXrr:
1791+
case AArch64::BICSWrs:
1792+
case AArch64::BICSXrs:
17831793
return Instr.getOpcode();
17841794

17851795
case AArch64::ADDWrr:
@@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) {
18101820
return AArch64::ANDSWri;
18111821
case AArch64::ANDXri:
18121822
return AArch64::ANDSXri;
1823+
case AArch64::ANDWrr:
1824+
return AArch64::ANDSWrr;
1825+
case AArch64::ANDWrs:
1826+
return AArch64::ANDSWrs;
1827+
case AArch64::ANDXrr:
1828+
return AArch64::ANDSXrr;
1829+
case AArch64::ANDXrs:
1830+
return AArch64::ANDSXrs;
1831+
case AArch64::BICWrr:
1832+
return AArch64::BICSWrr;
1833+
case AArch64::BICXrr:
1834+
return AArch64::BICSXrr;
1835+
case AArch64::BICWrs:
1836+
return AArch64::BICSWrs;
1837+
case AArch64::BICXrs:
1838+
return AArch64::BICSXrs;
18131839
}
18141840
}
18151841

@@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) {
19471973
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
19481974
}
19491975

1976+
static bool isANDOpcode(MachineInstr &MI) {
1977+
unsigned Opc = sForm(MI);
1978+
switch (Opc) {
1979+
case AArch64::ANDSWri:
1980+
case AArch64::ANDSWrr:
1981+
case AArch64::ANDSWrs:
1982+
case AArch64::ANDSXri:
1983+
case AArch64::ANDSXrr:
1984+
case AArch64::ANDSXrs:
1985+
case AArch64::BICSWrr:
1986+
case AArch64::BICSXrr:
1987+
case AArch64::BICSWrs:
1988+
case AArch64::BICSXrs:
1989+
return true;
1990+
default:
1991+
return false;
1992+
}
1993+
}
1994+
19501995
/// Check if CmpInstr can be substituted by MI.
19511996
///
19521997
/// CmpInstr can be substituted:
@@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19842029
// 1) MI and CmpInstr set N and V to the same value.
19852030
// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
19862031
// signed overflow occurs, so CmpInstr could still be simplified away.
1987-
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
2032+
// Note that Ands and Bics instructions always clear the V flag.
2033+
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
19882034
return false;
19892035

19902036
AccessKind AccessToCheck = AK_Write;

llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ body: |
2626
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
2727
; CHECK-NEXT: {{ $}}
2828
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
29-
; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri killed [[ANDWri]], 15
30-
; CHECK-NEXT: $wzr = SUBSWri killed [[ANDWri]], 0, 0, implicit-def $nzcv
29+
; CHECK-NEXT: [[ANDSWri:%[0-9]+]]:gpr32common = ANDSWri killed [[ANDSWri]], 15, implicit-def $nzcv
3130
; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv
3231
; CHECK-NEXT: {{ $}}
3332
; CHECK-NEXT: bb.1.if.then:

llvm/test/CodeGen/AArch64/icmp-ult-eq-fold.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ define i1 @test_disjoint(i1 %0, i32 %1, i32 %2) {
167167
; CHECK-NEXT: mov w8, #1 // =0x1
168168
; CHECK-NEXT: orr w9, w2, #0x800000
169169
; CHECK-NEXT: lsl w8, w8, w1
170-
; CHECK-NEXT: and w8, w9, w8
171-
; CHECK-NEXT: cmp w8, #0
170+
; CHECK-NEXT: tst w9, w8
172171
; CHECK-NEXT: cset w8, eq
173172
; CHECK-NEXT: orr w8, w0, w8
174173
; CHECK-NEXT: and w0, w8, #0x1
@@ -188,8 +187,7 @@ define i1 @test_disjoint2(i1 %0, i32 %1, i32 %2) {
188187
; CHECK-NEXT: mov w8, #1 // =0x1
189188
; CHECK-NEXT: orr w9, w2, #0x800000
190189
; CHECK-NEXT: lsl w8, w8, w1
191-
; CHECK-NEXT: and w8, w9, w8
192-
; CHECK-NEXT: cmp w8, #0
190+
; CHECK-NEXT: tst w9, w8
193191
; CHECK-NEXT: cset w8, gt
194192
; CHECK-NEXT: orr w8, w0, w8
195193
; CHECK-NEXT: and w0, w8, #0x1
@@ -209,8 +207,7 @@ define i1 @test_disjoint3(i1 %0, i32 %1, i32 %2) {
209207
; CHECK-NEXT: mov w8, #1 // =0x1
210208
; CHECK-NEXT: orr w9, w2, #0x800000
211209
; CHECK-NEXT: lsl w8, w8, w1
212-
; CHECK-NEXT: and w8, w9, w8
213-
; CHECK-NEXT: cmp w8, #0
210+
; CHECK-NEXT: tst w9, w8
214211
; CHECK-NEXT: cset w8, mi
215212
; CHECK-NEXT: orr w8, w0, w8
216213
; CHECK-NEXT: and w0, w8, #0x1
@@ -273,8 +270,7 @@ define i1 @test_disjoint_inverse(i1 %0, i32 %1, i32 %2) {
273270
; CHECK-NEXT: mov w8, #1 // =0x1
274271
; CHECK-NEXT: orr w9, w2, #0x800000
275272
; CHECK-NEXT: lsl w8, w8, w1
276-
; CHECK-NEXT: bic w8, w9, w8
277-
; CHECK-NEXT: cmp w8, #0
273+
; CHECK-NEXT: bics wzr, w9, w8
278274
; CHECK-NEXT: cset w8, eq
279275
; CHECK-NEXT: orr w8, w0, w8
280276
; CHECK-NEXT: and w0, w8, #0x1
@@ -295,8 +291,7 @@ define i1 @test_disjoint2_inverse(i1 %0, i32 %1, i32 %2) {
295291
; CHECK-NEXT: mov w8, #1 // =0x1
296292
; CHECK-NEXT: orr w9, w2, #0x800000
297293
; CHECK-NEXT: lsl w8, w8, w1
298-
; CHECK-NEXT: bic w8, w9, w8
299-
; CHECK-NEXT: cmp w8, #0
294+
; CHECK-NEXT: bics wzr, w9, w8
300295
; CHECK-NEXT: cset w8, gt
301296
; CHECK-NEXT: orr w8, w0, w8
302297
; CHECK-NEXT: and w0, w8, #0x1
@@ -317,8 +312,7 @@ define i1 @test_disjoint3_inverse(i1 %0, i32 %1, i32 %2) {
317312
; CHECK-NEXT: mov w8, #1 // =0x1
318313
; CHECK-NEXT: orr w9, w2, #0x800000
319314
; CHECK-NEXT: lsl w8, w8, w1
320-
; CHECK-NEXT: bic w8, w9, w8
321-
; CHECK-NEXT: cmp w8, #0
315+
; CHECK-NEXT: bics wzr, w9, w8
322316
; CHECK-NEXT: cset w8, mi
323317
; CHECK-NEXT: orr w8, w0, w8
324318
; CHECK-NEXT: and w0, w8, #0x1
@@ -339,8 +333,7 @@ define i1 @test_disjoint_64(i1 %0, i64 %1, i64 %2) {
339333
; CHECK-NEXT: mov w8, #1 // =0x1
340334
; CHECK-NEXT: orr x9, x2, #0x80000000000000
341335
; CHECK-NEXT: lsl x8, x8, x1
342-
; CHECK-NEXT: and x8, x9, x8
343-
; CHECK-NEXT: cmp x8, #0
336+
; CHECK-NEXT: tst x9, x8
344337
; CHECK-NEXT: cset w8, eq
345338
; CHECK-NEXT: orr w8, w0, w8
346339
; CHECK-NEXT: and w0, w8, #0x1
@@ -360,8 +353,7 @@ define i1 @test_disjoint2_64(i1 %0, i64 %1, i64 %2) {
360353
; CHECK-NEXT: mov w8, #1 // =0x1
361354
; CHECK-NEXT: orr x9, x2, #0x80000000000000
362355
; CHECK-NEXT: lsl x8, x8, x1
363-
; CHECK-NEXT: and x8, x9, x8
364-
; CHECK-NEXT: cmp x8, #0
356+
; CHECK-NEXT: tst x9, x8
365357
; CHECK-NEXT: cset w8, gt
366358
; CHECK-NEXT: orr w8, w0, w8
367359
; CHECK-NEXT: and w0, w8, #0x1
@@ -381,8 +373,7 @@ define i1 @test_disjoint3_64(i1 %0, i64 %1, i64 %2) {
381373
; CHECK-NEXT: mov w8, #1 // =0x1
382374
; CHECK-NEXT: orr x9, x2, #0x80000000000000
383375
; CHECK-NEXT: lsl x8, x8, x1
384-
; CHECK-NEXT: and x8, x9, x8
385-
; CHECK-NEXT: cmp x8, #0
376+
; CHECK-NEXT: tst x9, x8
386377
; CHECK-NEXT: cset w8, mi
387378
; CHECK-NEXT: orr w8, w0, w8
388379
; CHECK-NEXT: and w0, w8, #0x1
@@ -445,8 +436,7 @@ define i1 @test_disjoint_inverse_64(i1 %0, i64 %1, i64 %2) {
445436
; CHECK-NEXT: mov w8, #1 // =0x1
446437
; CHECK-NEXT: orr x9, x2, #0x80000000000000
447438
; CHECK-NEXT: lsl x8, x8, x1
448-
; CHECK-NEXT: bic x8, x9, x8
449-
; CHECK-NEXT: cmp x8, #0
439+
; CHECK-NEXT: bics xzr, x9, x8
450440
; CHECK-NEXT: cset w8, eq
451441
; CHECK-NEXT: orr w8, w0, w8
452442
; CHECK-NEXT: and w0, w8, #0x1
@@ -467,8 +457,7 @@ define i1 @test_disjoint2_inverse_64(i1 %0, i64 %1, i64 %2) {
467457
; CHECK-NEXT: mov w8, #1 // =0x1
468458
; CHECK-NEXT: orr x9, x2, #0x80000000000000
469459
; CHECK-NEXT: lsl x8, x8, x1
470-
; CHECK-NEXT: bic x8, x9, x8
471-
; CHECK-NEXT: cmp x8, #0
460+
; CHECK-NEXT: bics xzr, x9, x8
472461
; CHECK-NEXT: cset w8, gt
473462
; CHECK-NEXT: orr w8, w0, w8
474463
; CHECK-NEXT: and w0, w8, #0x1
@@ -489,8 +478,7 @@ define i1 @test_disjoint3_inverse_64(i1 %0, i64 %1, i64 %2) {
489478
; CHECK-NEXT: mov w8, #1 // =0x1
490479
; CHECK-NEXT: orr x9, x2, #0x80000000000000
491480
; CHECK-NEXT: lsl x8, x8, x1
492-
; CHECK-NEXT: bic x8, x9, x8
493-
; CHECK-NEXT: cmp x8, #0
481+
; CHECK-NEXT: bics xzr, x9, x8
494482
; CHECK-NEXT: cset w8, mi
495483
; CHECK-NEXT: orr w8, w0, w8
496484
; CHECK-NEXT: and w0, w8, #0x1

0 commit comments

Comments
 (0)