diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 594f1bff5c458..bab47a14d380c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2640,6 +2640,26 @@ static std::optional instCombinePTrue(InstCombiner &IC, return std::nullopt; } +static std::optional instCombineSVEUxt(InstCombiner &IC, + IntrinsicInst &II, + unsigned NumBits) { + Value *Passthru = II.getOperand(0); + Value *Pg = II.getOperand(1); + Value *Op = II.getOperand(2); + + // Convert UXT[BHW] to AND. + if (isa(Passthru) || isAllActivePredicate(Pg)) { + auto *Ty = cast(II.getType()); + auto MaskValue = APInt::getLowBitsSet(Ty->getScalarSizeInBits(), NumBits); + auto *Mask = ConstantInt::get(Ty, MaskValue); + auto *And = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_and_u, {Ty}, + {Pg, Op, Mask}); + return IC.replaceInstUsesWith(II, And); + } + + return std::nullopt; +} + std::optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -2745,6 +2765,12 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVEInsr(IC, II); case Intrinsic::aarch64_sve_ptrue: return instCombinePTrue(IC, II); + case Intrinsic::aarch64_sve_uxtb: + return instCombineSVEUxt(IC, II, 8); + case Intrinsic::aarch64_sve_uxth: + return instCombineSVEUxt(IC, II, 16); + case Intrinsic::aarch64_sve_uxtw: + return instCombineSVEUxt(IC, II, 32); } return std::nullopt; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll new file mode 100644 index 0000000000000..428691c077773 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Test that we combine uxtb to and_u for all-active predicates. + +define @uxtb_m_64( %0, %1) #0 { +; CHECK-LABEL: define @uxtb_m_64( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( splat (i1 true), [[TMP0]], splat (i64 255)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxtb.nxv2i64( %1, splat (i1 true), %0) + ret %3 +} + +; Test that we combine uxtb to and_u for undef (``unknown'') passthrough. + +define @uxtb_x_64( %0, %1) #0 { +; CHECK-LABEL: define @uxtb_x_64( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( [[TMP0]], [[TMP1]], splat (i64 255)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxtb.nxv2i64( undef, %0, %1) + ret %3 +} + +; Negative test - ensure we don't combine non-undef, no-all-active predicates. + +define @uxtb_m_64_no_ptrue( %0, %1, %2) #0 { +; CHECK-LABEL: define @uxtb_m_64_no_ptrue( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]], [[TMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uxtb.nxv2i64( [[TMP2]], [[TMP0]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP4]] +; + %4 = tail call @llvm.aarch64.sve.uxtb.nxv2i64( %2, %0, %1) + ret %4 +} + +; For the remaining uxt* intrinsics and types, test that we combine them to the +; appropriate and_u variant with a suitable mask. + +define @uxtb_m_32( %0, %1) #0 { +; CHECK-LABEL: define @uxtb_m_32( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv4i32( splat (i1 true), [[TMP0]], splat (i32 255)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxtb.nxv4i32( %1, splat (i1 true), %0) + ret %3 +} + +define @uxtb_m_16( %0, %1) #0 { +; CHECK-LABEL: define @uxtb_m_16( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv8i16( splat (i1 true), [[TMP0]], splat (i16 255)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxtb.nxv8i16( %1, splat (i1 true), %0) + ret %3 +} + +define @uxth_m_64( %0, %1) #0 { +; CHECK-LABEL: define @uxth_m_64( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( splat (i1 true), [[TMP0]], splat (i64 65535)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxth.nxv2i64( %1, splat (i1 true), %0) + ret %3 +} + +define @uxth_m_32( %0, %1) #0 { +; CHECK-LABEL: define @uxth_m_32( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv4i32( splat (i1 true), [[TMP0]], splat (i32 65535)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxth.nxv4i32( %1, splat (i1 true), %0) + ret %3 +} + +define @uxtw_m_64( %0, %1) #0 { +; CHECK-LABEL: define @uxtw_m_64( +; CHECK-SAME: [[TMP0:%.*]], [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.and.u.nxv2i64( splat (i1 true), [[TMP0]], splat (i64 4294967295)) +; CHECK-NEXT: ret [[TMP3]] +; + %3 = tail call @llvm.aarch64.sve.uxtw.nxv2i64( %1, splat (i1 true), %0) + ret %3 +} + +attributes #0 = { "target-features"="+sve" }