From da65767749017c0d2e3cf6dd3996ee317a490e74 Mon Sep 17 00:00:00 2001 From: Vladimir Miloserdov Date: Tue, 23 Sep 2025 23:07:05 +0100 Subject: [PATCH] [AArch64][SVE] Optimize logical ops with convert.to.svbool When both operands of a logical operation (and/or/xor) are convert.to.svbool from the same narrower type, unwrap to that type, simplify using simplifyBinOp, and rewrap the result. This eliminates redundant instructions in cases like: svand_z(svptrue_b8(), svpnext_b16(prev, pg), svptrue_b16()); Fixes #160279. --- .../AArch64/AArch64TargetTransformInfo.cpp | 40 +++++- .../sve-intrinsic-and-or-with-all-true.ll | 123 ++++++++++++++++++ 2 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index fede586cf35bc..24bad469d251e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1469,8 +1469,17 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode( Instruction::UDiv); - case Intrinsic::aarch64_sve_addqv: case Intrinsic::aarch64_sve_and_z: + return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode( + Instruction::And); + case Intrinsic::aarch64_sve_orr_z: + return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode( + Instruction::Or); + case Intrinsic::aarch64_sve_eor_z: + return SVEIntrinsicInfo::defaultZeroingOp().setMatchingIROpcode( + Instruction::Xor); + + case Intrinsic::aarch64_sve_addqv: case Intrinsic::aarch64_sve_bic_z: case Intrinsic::aarch64_sve_brka_z: case Intrinsic::aarch64_sve_brkb_z: @@ -1479,13 +1488,11 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { case Intrinsic::aarch64_sve_brkpb_z: case Intrinsic::aarch64_sve_cntp: case Intrinsic::aarch64_sve_compact: - case Intrinsic::aarch64_sve_eor_z: case Intrinsic::aarch64_sve_eorv: case Intrinsic::aarch64_sve_eorqv: case Intrinsic::aarch64_sve_nand_z: case Intrinsic::aarch64_sve_nor_z: case Intrinsic::aarch64_sve_orn_z: - case Intrinsic::aarch64_sve_orr_z: case Intrinsic::aarch64_sve_orv: case Intrinsic::aarch64_sve_orqv: case Intrinsic::aarch64_sve_pnext: @@ -1659,6 +1666,30 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II, return &II; } + // If both operands are convert.to.svbool from the same narrower type, try to + // simplify the operation at that narrower type first. + if (isAllActivePredicate(Pg)) { + auto *ConvIntr1 = dyn_cast(Op1); + auto *ConvIntr2 = dyn_cast(Op2); + if (ConvIntr1 && ConvIntr2 && + ConvIntr1->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_to_svbool && + ConvIntr2->getIntrinsicID() == + Intrinsic::aarch64_sve_convert_to_svbool) { + Value *NarrowOp1 = ConvIntr1->getArgOperand(0); + Value *NarrowOp2 = ConvIntr2->getArgOperand(0); + if (NarrowOp1->getType() == NarrowOp2->getType()) { + if (Value *SimplifiedNarrow = + simplifyBinOp(Opc, NarrowOp1, NarrowOp2, DL)) { + Value *NewConv = IC.Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_convert_to_svbool, + {SimplifiedNarrow->getType()}, {SimplifiedNarrow}); + return IC.replaceInstUsesWith(II, NewConv); + } + } + } + } + // Only active lanes matter when simplifying the operation. Op1 = stripInactiveLanes(Op1, Pg); Op2 = stripInactiveLanes(Op2, Pg); @@ -1679,6 +1710,9 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II, if (IInfo.inactiveLanesAreNotDefined()) return IC.replaceInstUsesWith(II, SimpleII); + if (!IInfo.inactiveLanesTakenFromOperand()) + return std::nullopt; + Value *Inactive = II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom()); // The intrinsic does nothing (e.g. sve.mul(pg, A, 1.0)). diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll new file mode 100644 index 0000000000000..f214fa5872b9e --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-and-or-with-all-true.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ../../llvm-build/bin/opt +; RUN: opt -passes=instcombine -mtriple aarch64 -mattr=+sve -S < %s | FileCheck %s +; +; Test AArch64-specific InstCombine optimizations for SVE logical operations +; with all-true predicates. +; - a AND true = a +; - a OR true = true + +declare @llvm.aarch64.sve.and.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orr.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.eor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.pnext.nxv8i1(, ) + +define @test_and_convert_all_true_right_b16( %x) { +; CHECK-LABEL: @test_and_convert_all_true_right_b16( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[X:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_x, %conv_true) + ret %result +} + +define @test_and_convert_all_true_left_b16( %x) { +; CHECK-LABEL: @test_and_convert_all_true_left_b16( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[X:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_true, %conv_x) + ret %result +} + +define @test_or_convert_all_true_right_b16( %x) { +; CHECK-LABEL: @test_or_convert_all_true_right_b16( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.orr.z.nxv16i1( splat (i1 true), %conv_x, %conv_true) + ret %result +} + +define @test_or_convert_all_true_left_b16( %x) { +; CHECK-LABEL: @test_or_convert_all_true_left_b16( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.orr.z.nxv16i1( splat (i1 true), %conv_true, %conv_x) + ret %result +} +define @test_and_convert_all_true_b32( %x) { +; CHECK-LABEL: @test_and_convert_all_true_b32( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[X:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_x, %conv_true) + ret %result +} + +define @test_and_convert_all_true_b64( %x) { +; CHECK-LABEL: @test_and_convert_all_true_b64( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[X:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_x, %conv_true) + ret %result +} + +; Negative test +define @test_and_convert_different_granularities( %x, %y) { +; CHECK-LABEL: @test_and_convert_different_granularities( +; CHECK-NEXT: [[CONV_X:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[X:%.*]]) +; CHECK-NEXT: [[CONV_Y:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[Y:%.*]]) +; CHECK-NEXT: [[RESULT:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), [[CONV_X]], [[CONV_Y]]) +; CHECK-NEXT: ret [[RESULT]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_y = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %y) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_x, %conv_y) + ret %result +} + +; Negative test +define @test_and_convert_non_all_true_predicate( %pred, %x) { +; CHECK-LABEL: @test_and_convert_non_all_true_predicate( +; CHECK-NEXT: [[CONV_X:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[X:%.*]]) +; CHECK-NEXT: [[CONV_TRUE:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) +; CHECK-NEXT: [[RESULT:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( [[PRED:%.*]], [[CONV_X]], [[CONV_TRUE]]) +; CHECK-NEXT: ret [[RESULT]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_true = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( splat (i1 true)) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( %pred, %conv_x, %conv_true) + ret %result +} + +; Negative test +define @test_and_convert_no_all_true( %x, %y) { +; CHECK-LABEL: @test_and_convert_no_all_true( +; CHECK-NEXT: [[CONV_X:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[X:%.*]]) +; CHECK-NEXT: [[CONV_Y:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[Y:%.*]]) +; CHECK-NEXT: [[RESULT:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), [[CONV_X]], [[CONV_Y]]) +; CHECK-NEXT: ret [[RESULT]] +; + %conv_x = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %x) + %conv_y = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %y) + %result = tail call @llvm.aarch64.sve.and.z.nxv16i1( splat (i1 true), %conv_x, %conv_y) + ret %result +}