Skip to content

Commit c61ae4b

Browse files
committed
[AArch64][SVE] Optimize svand_z/svorr_z with all-true predicates.
Fix redundant AND/OR operations with all-true SVE predicates that were not being optimized. Modify isAllActivePredicate to detect splat(i1 true) patterns and add IR opcode mappings for and_z/orr_z/eor_z intrinsics to enable simplification through simplifySVEIntrinsicBinOp. Fixes #160279.
1 parent fc57ca3 commit c61ae4b

File tree

2 files changed

+130
-5
lines changed

2 files changed

+130
-5
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,8 +1441,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
14411441
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
14421442
Instruction::UDiv);
14431443

1444-
case Intrinsic::aarch64_sve_addqv:
14451444
case Intrinsic::aarch64_sve_and_z:
1445+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1446+
Instruction::And);
1447+
case Intrinsic::aarch64_sve_orr_z:
1448+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1449+
Instruction::Or);
1450+
case Intrinsic::aarch64_sve_eor_z:
1451+
return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode(
1452+
Instruction::Xor);
1453+
1454+
case Intrinsic::aarch64_sve_addqv:
14461455
case Intrinsic::aarch64_sve_bic_z:
14471456
case Intrinsic::aarch64_sve_brka_z:
14481457
case Intrinsic::aarch64_sve_brkb_z:
@@ -1451,13 +1460,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
14511460
case Intrinsic::aarch64_sve_brkpb_z:
14521461
case Intrinsic::aarch64_sve_cntp:
14531462
case Intrinsic::aarch64_sve_compact:
1454-
case Intrinsic::aarch64_sve_eor_z:
14551463
case Intrinsic::aarch64_sve_eorv:
14561464
case Intrinsic::aarch64_sve_eorqv:
14571465
case Intrinsic::aarch64_sve_nand_z:
14581466
case Intrinsic::aarch64_sve_nor_z:
14591467
case Intrinsic::aarch64_sve_orn_z:
1460-
case Intrinsic::aarch64_sve_orr_z:
14611468
case Intrinsic::aarch64_sve_orv:
14621469
case Intrinsic::aarch64_sve_orqv:
14631470
case Intrinsic::aarch64_sve_pnext:
@@ -1587,8 +1594,21 @@ static bool isAllActivePredicate(Value *Pred) {
15871594
if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
15881595
cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
15891596
Pred = UncastedPred;
1590-
auto *C = dyn_cast<Constant>(Pred);
1591-
return (C && C->isAllOnesValue());
1597+
1598+
// Also look through just convert.to.svbool if the input is an all-true splat
1599+
Value *ConvertArg;
1600+
if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
1601+
m_Value(ConvertArg))))
1602+
Pred = ConvertArg;
1603+
// Check for splat(i1 true) pattern used by svptrue intrinsics
1604+
if (auto *C = dyn_cast<Constant>(Pred)) {
1605+
if (C->isAllOnesValue())
1606+
return true;
1607+
if (auto *SplatVal = C->getSplatValue())
1608+
if (auto *CI = dyn_cast<ConstantInt>(SplatVal))
1609+
return CI->isOne();
1610+
}
1611+
return false;
15921612
}
15931613

15941614
// Simplify `V` by only considering the operations that affect active lanes.
@@ -1623,6 +1643,22 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
16231643
return &II;
16241644
}
16251645

1646+
// For logical operations with all-true predicates, apply simplifications.
1647+
if (isAllActivePredicate(Pg)) {
1648+
if (Opc == Instruction::And) {
1649+
if (isAllActivePredicate(Op1))
1650+
return IC.replaceInstUsesWith(II, Op2);
1651+
if (isAllActivePredicate(Op2))
1652+
return IC.replaceInstUsesWith(II, Op1);
1653+
}
1654+
if (Opc == Instruction::Or) {
1655+
if (isAllActivePredicate(Op1))
1656+
return IC.replaceInstUsesWith(II, Op1);
1657+
if (isAllActivePredicate(Op2))
1658+
return IC.replaceInstUsesWith(II, Op2);
1659+
}
1660+
}
1661+
16261662
// Only active lanes matter when simplifying the operation.
16271663
Op1 = stripInactiveLanes(Op1, Pg);
16281664
Op2 = stripInactiveLanes(Op2, Pg);
@@ -1643,6 +1679,15 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
16431679
if (IInfo.inactiveLanesAreNotDefined())
16441680
return IC.replaceInstUsesWith(II, SimpleII);
16451681

1682+
// For zeroing operations, if we have an all-true predicate and the result
1683+
// simplifies, we can just use the simplified result directly since there
1684+
// are no inactive lanes to worry about.
1685+
if (IInfo.inactiveLanesAreUnused() && isAllActivePredicate(Pg))
1686+
return IC.replaceInstUsesWith(II, SimpleII);
1687+
1688+
if (!IInfo.inactiveLanesTakenFromOperand())
1689+
return std::nullopt;
1690+
16461691
Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom());
16471692

16481693
// The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)).
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S -o - < %s | FileCheck %s
2+
;
3+
; Test AArch64-specific InstCombine optimizations for SVE logical operations
4+
; with all-true predicates.
5+
; - a AND true = a
6+
; - a OR true = true
7+
8+
declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
9+
declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
10+
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
11+
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
12+
declare <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
13+
14+
define <vscale x 16 x i1> @test_sve_and_z_all_true_right(<vscale x 16 x i1> %a) {
15+
; CHECK-LABEL: @test_sve_and_z_all_true_right(
16+
; CHECK-NEXT: ret <vscale x 16 x i1> [[A:%.*]]
17+
%all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
18+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
19+
ret <vscale x 16 x i1> %result
20+
}
21+
22+
define <vscale x 16 x i1> @test_sve_and_z_all_true_left(<vscale x 16 x i1> %a) {
23+
; CHECK-LABEL: @test_sve_and_z_all_true_left(
24+
; CHECK-NEXT: ret <vscale x 16 x i1> [[A:%.*]]
25+
%all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
26+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
27+
ret <vscale x 16 x i1> %result
28+
}
29+
30+
define <vscale x 16 x i1> @test_sve_orr_z_all_true_right(<vscale x 16 x i1> %a) {
31+
; CHECK-LABEL: @test_sve_orr_z_all_true_right(
32+
; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
33+
; CHECK-NEXT: ret <vscale x 16 x i1> [[ALL_TRUE]]
34+
%all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
35+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
36+
ret <vscale x 16 x i1> %result
37+
}
38+
39+
define <vscale x 16 x i1> @test_sve_orr_z_all_true_left(<vscale x 16 x i1> %a) {
40+
; CHECK-LABEL: @test_sve_orr_z_all_true_left(
41+
; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
42+
; CHECK-NEXT: ret <vscale x 16 x i1> [[ALL_TRUE]]
43+
%all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
44+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %all_true, <vscale x 16 x i1> %a)
45+
ret <vscale x 16 x i1> %result
46+
}
47+
48+
define <vscale x 16 x i1> @test_original_bug_case(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %prev) {
49+
; CHECK-LABEL: @test_original_bug_case(
50+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
51+
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PREV:%.*]])
52+
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP2]])
53+
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP3]])
54+
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
55+
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
56+
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %prev)
57+
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %2)
58+
%4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %3)
59+
%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
60+
%6 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %4, <vscale x 16 x i1> %5)
61+
ret <vscale x 16 x i1> %6
62+
}
63+
64+
define <vscale x 16 x i1> @test_sve_and_z_not_all_true_predicate(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a) {
65+
; CHECK-LABEL: @test_sve_and_z_not_all_true_predicate(
66+
; CHECK-NEXT: [[ALL_TRUE:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
67+
; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[ALL_TRUE]])
68+
; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
69+
%all_true = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
70+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pred, <vscale x 16 x i1> %a, <vscale x 16 x i1> %all_true)
71+
ret <vscale x 16 x i1> %result
72+
}
73+
74+
define <vscale x 16 x i1> @test_sve_and_z_no_all_true_operands(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
75+
; CHECK-LABEL: @test_sve_and_z_no_all_true_operands(
76+
; CHECK-NEXT: [[RESULT:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
77+
; CHECK-NEXT: ret <vscale x 16 x i1> [[RESULT]]
78+
%result = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
79+
ret <vscale x 16 x i1> %result
80+
}

0 commit comments

Comments
 (0)