Skip to content

Commit 7b12a08

Browse files
authored
[AArch64] Allow peephole to optimize AND + signed compare with 0 (#153608)
This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same. Fixes: #154387
1 parent d5125b3 commit 7b12a08

File tree

3 files changed

+434
-1
lines changed

3 files changed

+434
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) {
17801780
case AArch64::SUBSWri:
17811781
case AArch64::SUBSXrr:
17821782
case AArch64::SUBSXri:
1783+
case AArch64::ANDSWri:
1784+
case AArch64::ANDSWrr:
1785+
case AArch64::ANDSWrs:
1786+
case AArch64::ANDSXri:
1787+
case AArch64::ANDSXrr:
1788+
case AArch64::ANDSXrs:
1789+
case AArch64::BICSWrr:
1790+
case AArch64::BICSXrr:
1791+
case AArch64::BICSWrs:
1792+
case AArch64::BICSXrs:
17831793
return Instr.getOpcode();
17841794

17851795
case AArch64::ADDWrr:
@@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) {
18101820
return AArch64::ANDSWri;
18111821
case AArch64::ANDXri:
18121822
return AArch64::ANDSXri;
1823+
case AArch64::ANDWrr:
1824+
return AArch64::ANDSWrr;
1825+
case AArch64::ANDWrs:
1826+
return AArch64::ANDSWrs;
1827+
case AArch64::ANDXrr:
1828+
return AArch64::ANDSXrr;
1829+
case AArch64::ANDXrs:
1830+
return AArch64::ANDSXrs;
1831+
case AArch64::BICWrr:
1832+
return AArch64::BICSWrr;
1833+
case AArch64::BICXrr:
1834+
return AArch64::BICSXrr;
1835+
case AArch64::BICWrs:
1836+
return AArch64::BICSWrs;
1837+
case AArch64::BICXrs:
1838+
return AArch64::BICSXrs;
18131839
}
18141840
}
18151841

@@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) {
19471973
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
19481974
}
19491975

1976+
static bool isANDOpcode(MachineInstr &MI) {
1977+
unsigned Opc = sForm(MI);
1978+
switch (Opc) {
1979+
case AArch64::ANDSWri:
1980+
case AArch64::ANDSWrr:
1981+
case AArch64::ANDSWrs:
1982+
case AArch64::ANDSXri:
1983+
case AArch64::ANDSXrr:
1984+
case AArch64::ANDSXrs:
1985+
case AArch64::BICSWrr:
1986+
case AArch64::BICSXrr:
1987+
case AArch64::BICSWrs:
1988+
case AArch64::BICSXrs:
1989+
return true;
1990+
default:
1991+
return false;
1992+
}
1993+
}
1994+
19501995
/// Check if CmpInstr can be substituted by MI.
19511996
///
19521997
/// CmpInstr can be substituted:
@@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19842029
// 1) MI and CmpInstr set N and V to the same value.
19852030
// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
19862031
// signed overflow occurs, so CmpInstr could still be simplified away.
1987-
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
2032+
// Note that Ands and Bics instructions always clear the V flag.
2033+
if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
19882034
return false;
19892035

19902036
AccessKind AccessToCheck = AK_Write;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s
3+
--- |
4+
define i32 @test01() nounwind {
5+
entry:
6+
%0 = select i1 true, i32 1, i32 0
7+
%1 = and i32 %0, 65535
8+
%2 = icmp sgt i32 %1, 0
9+
br i1 %2, label %if.then, label %if.end
10+
11+
if.then: ; preds = %entry
12+
ret i32 1
13+
14+
if.end: ; preds = %entry
15+
ret i32 0
16+
}
17+
...
18+
---
19+
name: test01
20+
registers:
21+
- { id: 0, class: gpr32 }
22+
- { id: 1, class: gpr32common }
23+
body: |
24+
; CHECK-LABEL: name: test01
25+
; CHECK: bb.0.entry:
26+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
29+
; CHECK-NEXT: [[ANDSWri:%[0-9]+]]:gpr32common = ANDSWri killed [[ANDSWri]], 15, implicit-def $nzcv
30+
; CHECK-NEXT: Bcc 12, %bb.2, implicit $nzcv
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: bb.1.if.then:
33+
; CHECK-NEXT: $w0 = MOVi32imm 1
34+
; CHECK-NEXT: RET_ReallyLR implicit $w0
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: bb.2.if.end:
37+
; CHECK-NEXT: $w0 = MOVi32imm 0
38+
; CHECK-NEXT: RET_ReallyLR implicit $w0
39+
bb.0.entry:
40+
successors: %bb.2.if.end, %bb.1.if.then
41+
42+
%0 = MOVi32imm 1
43+
%1 = ANDWri killed %1, 15
44+
$wzr = SUBSWri killed %1, 0, 0, implicit-def $nzcv
45+
Bcc 12, %bb.2.if.end, implicit $nzcv
46+
47+
bb.1.if.then:
48+
$w0 = MOVi32imm 1
49+
RET_ReallyLR implicit $w0
50+
51+
bb.2.if.end:
52+
$w0 = MOVi32imm 0
53+
RET_ReallyLR implicit $w0
54+
55+
...

0 commit comments

Comments
 (0)