Skip to content

Commit b201a2f

Browse files
committed
[AArch64][SVE] Optimize logical ops with convert.to.svbool
When both operands of a logical operation (and/or/xor) are convert.to.svbool from the same narrower type, unwrap to that type, simplify using simplifyBinOp, and rewrap the result. This eliminates redundant instructions in cases like: svand_z(svptrue_b8(), svpnext_b16(prev, pg), svptrue_b16()); Fixes #160279.
1 parent 66b4815 commit b201a2f

File tree

2 files changed

+157
-3
lines changed

2 files changed

+157
-3
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,8 +1469,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
14691469
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
14701470
Instruction::UDiv);
14711471

1472-
case Intrinsic::aarch64_sve_addqv:
14731472
case Intrinsic::aarch64_sve_and_z:
1473+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1474+
Instruction::And);
1475+
case Intrinsic::aarch64_sve_orr_z:
1476+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1477+
Instruction::Or);
1478+
case Intrinsic::aarch64_sve_eor_z:
1479+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1480+
Instruction::Xor);
1481+
1482+
case Intrinsic::aarch64_sve_addqv:
14741483
case Intrinsic::aarch64_sve_bic_z:
14751484
case Intrinsic::aarch64_sve_brka_z:
14761485
case Intrinsic::aarch64_sve_brkb_z:
@@ -1479,13 +1488,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
14791488
case Intrinsic::aarch64_sve_brkpb_z:
14801489
case Intrinsic::aarch64_sve_cntp:
14811490
case Intrinsic::aarch64_sve_compact:
1482-
case Intrinsic::aarch64_sve_eor_z:
14831491
case Intrinsic::aarch64_sve_eorv:
14841492
case Intrinsic::aarch64_sve_eorqv:
14851493
case Intrinsic::aarch64_sve_nand_z:
14861494
case Intrinsic::aarch64_sve_nor_z:
14871495
case Intrinsic::aarch64_sve_orn_z:
1488-
case Intrinsic::aarch64_sve_orr_z:
14891496
case Intrinsic::aarch64_sve_orv:
14901497
case Intrinsic::aarch64_sve_orqv:
14911498
case Intrinsic::aarch64_sve_pnext:
@@ -1659,6 +1666,27 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
16591666
return ⅈ
16601667
}
16611668

1669+
// If both operands are convert.to.svbool from the same narrower type, try to
1670+
// simplify the operation at that narrower type first.
1671+
if (isAllActivePredicate(Pg)) {
1672+
auto *ConvIntr1 = dyn_cast<IntrinsicInst>(Op1);
1673+
auto *ConvIntr2 = dyn_cast<IntrinsicInst>(Op2);
1674+
if (ConvIntr1 && ConvIntr2 &&
1675+
ConvIntr1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
1676+
ConvIntr2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool) {
1677+
Value *NarrowOp1 = ConvIntr1->getArgOperand(0);
1678+
Value *NarrowOp2 = ConvIntr2->getArgOperand(0);
1679+
if (NarrowOp1->getType() == NarrowOp2->getType()) {
1680+
if (Value *SimplifiedNarrow = simplifyBinOp(Opc, NarrowOp1, NarrowOp2, DL)) {
1681+
Value *NewConv = IC.Builder.CreateIntrinsic(
1682+
Intrinsic::aarch64_sve_convert_to_svbool,
1683+
{SimplifiedNarrow->getType()}, {SimplifiedNarrow});
1684+
return IC.replaceInstUsesWith(II, NewConv);
1685+
}
1686+
}
1687+
}
1688+
}
1689+
16621690
// Only active lanes matter when simplifying the operation.
16631691
Op1 = stripInactiveLanes(Op1, Pg);
16641692
Op2 = stripInactiveLanes(Op2, Pg);
@@ -1679,6 +1707,9 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
16791707
if (IInfo.inactiveLanesAreNotDefined())
16801708
return IC.replaceInstUsesWith(II, SimpleII);
16811709

1710+
if (!IInfo.inactiveLanesTakenFromOperand())
1711+
return std::nullopt;
1712+
16821713
Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
16831714

16841715
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ../../llvm-build/bin/opt
2+
; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S < %s | FileCheck %s
3+
;
4+
; Test AArch64-specific InstCombine optimizations for SVE logical operations
5+
; with all-true predicates.
6+
; - a AND true = a
7+
; - a OR true = true
8+
9+
declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
10+
declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
11+
declare <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
12+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
13+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
14+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
15+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
16+
declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
17+
18+
define <vscale x 16 x i1> @test_and_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
19+
; CHECK-LABEL: @test_and_convert_all_true_right_b16(
20+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
21+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
22+
;
23+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
24+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
25+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
26+
ret <vscale x 16 x i1> %result
27+
}
28+
29+
define <vscale x 16 x i1> @test_and_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
30+
; CHECK-LABEL: @test_and_convert_all_true_left_b16(
31+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
32+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
33+
;
34+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
35+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
36+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
37+
ret <vscale x 16 x i1> %result
38+
}
39+
40+
define <vscale x 16 x i1> @test_or_convert_all_true_right_b16(<vscale x 8 x i1> %x) {
41+
; CHECK-LABEL: @test_or_convert_all_true_right_b16(
42+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
43+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
44+
;
45+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
46+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
47+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
48+
ret <vscale x 16 x i1> %result
49+
}
50+
51+
define <vscale x 16 x i1> @test_or_convert_all_true_left_b16(<vscale x 8 x i1> %x) {
52+
; CHECK-LABEL: @test_or_convert_all_true_left_b16(
53+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
54+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
55+
;
56+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
57+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
58+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_true, <vscale x 16 x i1> %conv_x)
59+
ret <vscale x 16 x i1> %result
60+
}
61+
define <vscale x 16 x i1> @test_and_convert_all_true_b32(<vscale x 4 x i1> %x) {
62+
; CHECK-LABEL: @test_and_convert_all_true_b32(
63+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[X:%.*]])
64+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
65+
;
66+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %x)
67+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
68+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
69+
ret <vscale x 16 x i1> %result
70+
}
71+
72+
define <vscale x 16 x i1> @test_and_convert_all_true_b64(<vscale x 2 x i1> %x) {
73+
; CHECK-LABEL: @test_and_convert_all_true_b64(
74+
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[X:%.*]])
75+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
76+
;
77+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %x)
78+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
79+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
80+
ret <vscale x 16 x i1> %result
81+
}
82+
83+
; Negative test
84+
define <vscale x 16 x i1> @test_and_convert_different_granularities(<vscale x 8 x i1> %x, <vscale x 4 x i1> %y) {
85+
; CHECK-LABEL: @test_and_convert_different_granularities(
86+
; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
87+
; CHECK-NEXT: [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[Y:%.*]])
88+
; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
89+
; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
90+
;
91+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
92+
%conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %y)
93+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
94+
ret <vscale x 16 x i1> %result
95+
}
96+
97+
; Negative test
98+
define <vscale x 16 x i1> @test_and_convert_non_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 8 x i1> %x) {
99+
; CHECK-LABEL: @test_and_convert_non_all_true_predicate(
100+
; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
101+
; CHECK-NEXT: [[CONV_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
102+
; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_TRUE]])
103+
; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
104+
;
105+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
106+
%conv_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
107+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_true)
108+
ret <vscale x 16 x i1> %result
109+
}
110+
111+
; Negative test
112+
define <vscale x 16 x i1> @test_and_convert_no_all_true(<vscale x 8 x i1> %x, <vscale x 8 x i1> %y) {
113+
; CHECK-LABEL: @test_and_convert_no_all_true(
114+
; CHECK-NEXT: [[CONV_X:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[X:%.*]])
115+
; CHECK-NEXT: [[CONV_Y:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[Y:%.*]])
116+
; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[CONV_X]], <vscale x 16 x i1> [[CONV_Y]])
117+
; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
118+
;
119+
%conv_x = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %x)
120+
%conv_y = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %y)
121+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %conv_x, <vscale x 16 x i1> %conv_y)
122+
ret <vscale x 16 x i1> %result
123+
}

0 commit comments

Comments
 (0)