Skip to content

Commit 1e10b78

Browse files
authored
[AArch64][InstCombine] Canonicalize whilelo intrinsic (#151553)
InstCombine llvm.aarch64.sve.whilelo to the generic LLVM llvm.get.active.lane.mask intrinsic
1 parent b936a7c commit 1e10b78

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2747,6 +2747,15 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
27472747
return std::nullopt;
27482748
}
27492749

2750+
static std::optional<Instruction *> instCombineWhilelo(InstCombiner &IC,
2751+
IntrinsicInst &II) {
2752+
return IC.replaceInstUsesWith(
2753+
II,
2754+
IC.Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
2755+
{II.getType(), II.getOperand(0)->getType()},
2756+
{II.getOperand(0), II.getOperand(1)}));
2757+
}
2758+
27502759
static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
27512760
IntrinsicInst &II) {
27522761
if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
@@ -2883,6 +2892,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
28832892
return instCombineSVEDupqLane(IC, II);
28842893
case Intrinsic::aarch64_sve_insr:
28852894
return instCombineSVEInsr(IC, II);
2895+
case Intrinsic::aarch64_sve_whilelo:
2896+
return instCombineWhilelo(IC, II);
28862897
case Intrinsic::aarch64_sve_ptrue:
28872898
return instCombinePTrue(IC, II);
28882899
case Intrinsic::aarch64_sve_uxtb:
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define <vscale x 2 x i1> @whilelo_nxv2i1.i32(i32 %a, i32 %b) {
7+
; CHECK-LABEL: define <vscale x 2 x i1> @whilelo_nxv2i1.i32(
8+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
9+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[A]], i32 [[B]])
10+
; CHECK-NEXT: ret <vscale x 2 x i1> [[MASK]]
11+
;
12+
%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b)
13+
ret <vscale x 2 x i1> %mask
14+
}
15+
16+
define <vscale x 4 x i1> @whilelo_nxv4i1.i32(i32 %a, i32 %b) {
17+
; CHECK-LABEL: define <vscale x 4 x i1> @whilelo_nxv4i1.i32(
18+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
19+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[A]], i32 [[B]])
20+
; CHECK-NEXT: ret <vscale x 4 x i1> [[MASK]]
21+
;
22+
%mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b)
23+
ret <vscale x 4 x i1> %mask
24+
}
25+
26+
define <vscale x 8 x i1> @whilelo_nxv8i1.i32(i32 %a, i32 %b) {
27+
; CHECK-LABEL: define <vscale x 8 x i1> @whilelo_nxv8i1.i32(
28+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
29+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 [[A]], i32 [[B]])
30+
; CHECK-NEXT: ret <vscale x 8 x i1> [[MASK]]
31+
;
32+
%mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b)
33+
ret <vscale x 8 x i1> %mask
34+
}
35+
36+
define <vscale x 16 x i1> @whilelo_nxv16i1.i32(i32 %a, i32 %b) {
37+
; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i32(
38+
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
39+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[A]], i32 [[B]])
40+
; CHECK-NEXT: ret <vscale x 16 x i1> [[MASK]]
41+
;
42+
%mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b)
43+
ret <vscale x 16 x i1> %mask
44+
}
45+
46+
define <vscale x 2 x i1> @whilelo_nxv2i1.i64(i64 %a, i64 %b) {
47+
; CHECK-LABEL: define <vscale x 2 x i1> @whilelo_nxv2i1.i64(
48+
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
49+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[A]], i64 [[B]])
50+
; CHECK-NEXT: ret <vscale x 2 x i1> [[MASK]]
51+
;
52+
%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
53+
ret <vscale x 2 x i1> %mask
54+
}
55+
56+
define <vscale x 4 x i1> @whilelo_nxv4i1.i64(i64 %a, i64 %b) {
57+
; CHECK-LABEL: define <vscale x 4 x i1> @whilelo_nxv4i1.i64(
58+
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
59+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[A]], i64 [[B]])
60+
; CHECK-NEXT: ret <vscale x 4 x i1> [[MASK]]
61+
;
62+
%mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b)
63+
ret <vscale x 4 x i1> %mask
64+
}
65+
66+
define <vscale x 8 x i1> @whilelo_nxv8i1.i64(i64 %a, i64 %b) {
67+
; CHECK-LABEL: define <vscale x 8 x i1> @whilelo_nxv8i1.i64(
68+
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
69+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[A]], i64 [[B]])
70+
; CHECK-NEXT: ret <vscale x 8 x i1> [[MASK]]
71+
;
72+
%mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b)
73+
ret <vscale x 8 x i1> %mask
74+
}
75+
76+
define <vscale x 16 x i1> @whilelo_nxv16i1.i64(i64 %a, i64 %b) {
77+
; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i64(
78+
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
79+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[A]], i64 [[B]])
80+
; CHECK-NEXT: ret <vscale x 16 x i1> [[MASK]]
81+
;
82+
%mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b)
83+
ret <vscale x 16 x i1> %mask
84+
}
85+
86+
define <vscale x 16 x i1> @whilelo_nxv16i1.i64_const() {
87+
; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i64_const() {
88+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
89+
; CHECK-NEXT: ret <vscale x 16 x i1> [[MASK]]
90+
;
91+
%mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 16)
92+
ret <vscale x 16 x i1> %mask
93+
}
94+
95+
define <vscale x 16 x i1> @whilelo_nxv16i1.i32_const() {
96+
; CHECK-LABEL: define <vscale x 16 x i1> @whilelo_nxv16i1.i32_const() {
97+
; CHECK-NEXT: [[MASK:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 16)
98+
; CHECK-NEXT: ret <vscale x 16 x i1> [[MASK]]
99+
;
100+
%mask = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 0, i32 16)
101+
ret <vscale x 16 x i1> %mask
102+
}

0 commit comments

Comments
 (0)