Skip to content

Commit 4d88bb6

Browse files
authored
[Hexagon] Implement isMaskAndCmp0FoldingBeneficial (llvm#166891)
Sink `and` mask to `cmp` use block if it is masking a single bit since this will fold the `and/cmp/br` into a single `tstbit` instruction.
1 parent cf1f871 commit 4d88bb6

File tree

3 files changed

+80
-0
lines changed

3 files changed

+80
-0
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
39483948
AtomicCmpXchgInst *AI) const {
39493949
return AtomicExpansionKind::LLSC;
39503950
}
3951+
3952+
bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
3953+
const Instruction &AndI) const {
3954+
// Only sink 'and' mask to cmp use block if it is masking a single bit since
3955+
// this will fold the and/cmp/br into a single tstbit instruction.
3956+
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
3957+
if (!Mask)
3958+
return false;
3959+
return Mask->getValue().isPowerOf2();
3960+
}

llvm/lib/Target/Hexagon/HexagonISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ class HexagonTargetLowering : public TargetLowering {
160160

161161
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
162162

163+
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
164+
163165
/// Return true if an FMA operation is faster than a pair of mul and add
164166
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
165167
/// method returns true (and FMAs are legal), otherwise fmuladd is
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit
3+
; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s
4+
5+
@A = global i32 zeroinitializer
6+
7+
define i32 @and_sink1(i32 %a) {
8+
; CHECK-LABEL: and_sink1:
9+
; CHECK: .cfi_startproc
10+
; CHECK-NEXT: // %bb.0:
11+
; CHECK-NEXT: {
12+
; CHECK-NEXT: p0 = !tstbit(r0,#11)
13+
; CHECK-NEXT: r0 = ##A
14+
; CHECK-NEXT: }
15+
; CHECK-NEXT: .p2align 4
16+
; CHECK-NEXT: .LBB0_1: // %bb0
17+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
18+
; CHECK-NEXT: {
19+
; CHECK-NEXT: if (p0) jump:nt .LBB0_1
20+
; CHECK-NEXT: memw(r0+#0) = #0
21+
; CHECK-NEXT: }
22+
; CHECK-NEXT: // %bb.2: // %bb2
23+
; CHECK-NEXT: {
24+
; CHECK-NEXT: r0 = #0
25+
; CHECK-NEXT: jumpr r31
26+
; CHECK-NEXT: }
27+
%and = and i32 %a, 2048
28+
br label %bb0
29+
bb0:
30+
%cmp = icmp eq i32 %and, 0
31+
store i32 0, i32* @A
32+
br i1 %cmp, label %bb0, label %bb2
33+
bb2:
34+
ret i32 0
35+
}
36+
37+
define i32 @and_sink2(i32 %a) {
38+
; CHECK-LABEL: and_sink2:
39+
; CHECK: .cfi_startproc
40+
; CHECK-NEXT: // %bb.0:
41+
; CHECK-NEXT: {
42+
; CHECK-NEXT: r1 = and(r0,##2049)
43+
; CHECK-NEXT: r0 = ##A
44+
; CHECK-NEXT: }
45+
; CHECK-NEXT: {
46+
; CHECK-NEXT: p0 = cmp.eq(r1,#0)
47+
; CHECK-NEXT: }
48+
; CHECK-NEXT: .p2align 4
49+
; CHECK-NEXT: .LBB1_1: // %bb0
50+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
51+
; CHECK-NEXT: {
52+
; CHECK-NEXT: if (p0) jump:nt .LBB1_1
53+
; CHECK-NEXT: memw(r0+#0) = #0
54+
; CHECK-NEXT: }
55+
; CHECK-NEXT: // %bb.2: // %bb2
56+
; CHECK-NEXT: {
57+
; CHECK-NEXT: r0 = #0
58+
; CHECK-NEXT: jumpr r31
59+
; CHECK-NEXT: }
60+
%and = and i32 %a, 2049
61+
br label %bb0
62+
bb0:
63+
%cmp = icmp eq i32 %and, 0
64+
store i32 0, i32* @A
65+
br i1 %cmp, label %bb0, label %bb2
66+
bb2:
67+
ret i32 0
68+
}

0 commit comments

Comments
 (0)