Skip to content

Commit 4a64024

Browse files
committed
[AArch64] fold subs ugt/ult to ands when the second operand is a mask
https://alive2.llvm.org/ce/z/pLhHI9 Fix: llvm/llvm-project#59598 Reviewed By: samtebbs Differential Revision: https://reviews.llvm.org/D141829
1 parent cc526e3 commit 4a64024

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19813,6 +19813,49 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
1981319813
return false;
1981419814
}
1981519815

19816+
// (X & C) >u Mask --> (X & (C & (~Mask)) != 0
19817+
// (X & C) <u Mask --> (X & (C & (~Mask)) == 0
19818+
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
19819+
SDNode *AndNode, SelectionDAG &DAG,
19820+
unsigned CCIndex, unsigned CmpIndex,
19821+
unsigned CC) {
19822+
if (CC != AArch64CC::HI && CC != AArch64CC::LO)
19823+
return SDValue();
19824+
19825+
ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
19826+
if (!AndC)
19827+
return SDValue();
19828+
19829+
ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
19830+
if (!SubsC)
19831+
return SDValue();
19832+
19833+
APInt SubsAP = SubsC->getAPIntValue();
19834+
if (!SubsAP.isMask())
19835+
return SDValue();
19836+
19837+
SDLoc DL(N);
19838+
APInt AndSMask = (~SubsAP) & AndC->getAPIntValue();
19839+
SDValue ANDS = DAG.getNode(
19840+
AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
19841+
DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
19842+
SDValue AArch64_CC =
19843+
DAG.getConstant(CC == AArch64CC::HI ? AArch64CC::NE : AArch64CC::EQ, DL,
19844+
N->getOperand(CCIndex)->getValueType(0));
19845+
19846+
// For now, only performCSELCombine and performBRCONDCombine call this
19847+
// function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
19848+
// operands. So just init the ops direct to simplify the code. If we have some
19849+
// other case with different CCIndex, CmpIndex, we need to use for loop to
19850+
// rewrite the code here.
19851+
// TODO: Do we need to assert number of operand is 4 here?
19852+
assert((CCIndex == 2 && CmpIndex == 3) &&
19853+
"Expected CCIndex to be 2 and CmpIndex to be 3.");
19854+
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
19855+
ANDS.getValue(1)};
19856+
return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
19857+
}
19858+
1981619859
static
1981719860
SDValue performCONDCombine(SDNode *N,
1981819861
TargetLowering::DAGCombinerInfo &DCI,
@@ -19834,6 +19877,10 @@ SDValue performCONDCombine(SDNode *N,
1983419877
if (AndNode->getOpcode() != ISD::AND)
1983519878
return SDValue();
1983619879

19880+
if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
19881+
CmpIndex, CC))
19882+
return Val;
19883+
1983719884
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
1983819885
uint32_t CNV = CN->getZExtValue();
1983919886
if (CNV == 255)

llvm/test/CodeGen/AArch64/andcompare.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,5 +2401,86 @@ entry:
24012401
%z = zext i1 %a to i32
24022402
ret i32 %z
24032403
}
2404+
2405+
define i32 @cmp_to_ands1(i32 %num) {
2406+
; SDISEL-LABEL: cmp_to_ands1:
2407+
; SDISEL: // %bb.0:
2408+
; SDISEL-NEXT: and w8, w0, #0xff
2409+
; SDISEL-NEXT: tst w0, #0xfe
2410+
; SDISEL-NEXT: csel w0, w8, wzr, ne
2411+
; SDISEL-NEXT: ret
2412+
;
2413+
; GISEL-LABEL: cmp_to_ands1:
2414+
; GISEL: // %bb.0:
2415+
; GISEL-NEXT: and w8, w0, #0xff
2416+
; GISEL-NEXT: cmp w8, #1
2417+
; GISEL-NEXT: csel w0, w8, wzr, hi
2418+
; GISEL-NEXT: ret
2419+
%and = and i32 %num, 255
2420+
%cmp = icmp ugt i32 %and, 1
2421+
%r = select i1 %cmp, i32 %and, i32 0
2422+
ret i32 %r
2423+
}
2424+
2425+
define i32 @cmp_to_ands2(i32 %num) {
2426+
; SDISEL-LABEL: cmp_to_ands2:
2427+
; SDISEL: // %bb.0:
2428+
; SDISEL-NEXT: and w8, w0, #0xfe
2429+
; SDISEL-NEXT: tst w0, #0xc0
2430+
; SDISEL-NEXT: csel w0, w8, wzr, ne
2431+
; SDISEL-NEXT: ret
2432+
;
2433+
; GISEL-LABEL: cmp_to_ands2:
2434+
; GISEL: // %bb.0:
2435+
; GISEL-NEXT: and w8, w0, #0xfe
2436+
; GISEL-NEXT: cmp w8, #63
2437+
; GISEL-NEXT: csel w0, w8, wzr, hi
2438+
; GISEL-NEXT: ret
2439+
%and = and i32 %num, 254
2440+
%cmp = icmp ugt i32 %and, 63
2441+
%r = select i1 %cmp, i32 %and, i32 0
2442+
ret i32 %r
2443+
}
2444+
2445+
define i32 @cmp_to_ands3(i32 %num, i32 %a) {
2446+
; SDISEL-LABEL: cmp_to_ands3:
2447+
; SDISEL: // %bb.0:
2448+
; SDISEL-NEXT: tst w0, #0x10
2449+
; SDISEL-NEXT: csel w0, w1, wzr, ne
2450+
; SDISEL-NEXT: ret
2451+
;
2452+
; GISEL-LABEL: cmp_to_ands3:
2453+
; GISEL: // %bb.0:
2454+
; GISEL-NEXT: mov w8, #23
2455+
; GISEL-NEXT: and w8, w0, w8
2456+
; GISEL-NEXT: cmp w8, #7
2457+
; GISEL-NEXT: csel w0, w1, wzr, hi
2458+
; GISEL-NEXT: ret
2459+
%and = and i32 %num, 23
2460+
%cmp = icmp ugt i32 %and, 7
2461+
%r = select i1 %cmp, i32 %a, i32 0
2462+
ret i32 %r
2463+
}
2464+
2465+
define i32 @cmp_to_ands4(i32 %num, i32 %a) {
2466+
; SDISEL-LABEL: cmp_to_ands4:
2467+
; SDISEL: // %bb.0:
2468+
; SDISEL-NEXT: and w8, w0, #0x30
2469+
; SDISEL-NEXT: tst w0, #0x20
2470+
; SDISEL-NEXT: csel w0, w8, w1, eq
2471+
; SDISEL-NEXT: ret
2472+
;
2473+
; GISEL-LABEL: cmp_to_ands4:
2474+
; GISEL: // %bb.0:
2475+
; GISEL-NEXT: and w8, w0, #0x30
2476+
; GISEL-NEXT: cmp w8, #31
2477+
; GISEL-NEXT: csel w0, w8, w1, lo
2478+
; GISEL-NEXT: ret
2479+
%and = and i32 %num, 48
2480+
%cmp = icmp ult i32 %and, 31
2481+
%r = select i1 %cmp, i32 %and, i32 %a
2482+
ret i32 %r
2483+
}
2484+
24042485
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
24052486
; CHECK: {{.*}}

0 commit comments

Comments
 (0)