diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 2ab652ca258c6..ec4dc787747ca 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2193,6 +2193,13 @@ m_TruncOrSelf(const OpTy &Op) { return m_CombineOr(m_Trunc(Op), Op); } +template +inline match_combine_or, + OpTy> +m_NUWTruncOrSelf(const OpTy &Op) { + return m_CombineOr(m_NUWTrunc(Op), Op); +} + /// Matches SExt. template inline CastInst_match m_SExt(const OpTy &Op) { diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 23a4d1b5c615e..b0376c4803f20 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -613,8 +613,12 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const { if (EC.isScalable()) { // Compare vscale patterns uint64_t VScaleFactor; - if (match(VLParam, m_Mul(m_VScale(), m_ConstantInt(VScaleFactor)))) + if (match(VLParam, m_NUWTruncOrSelf( + m_NUWMul(m_VScale(), m_ConstantInt(VScaleFactor))))) return VScaleFactor >= EC.getKnownMinValue(); + if (match(VLParam, m_NUWTruncOrSelf( + m_NUWShl(m_VScale(), m_ConstantInt(VScaleFactor))))) + return 1 << VScaleFactor >= EC.getKnownMinValue(); return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale()); } diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll new file mode 100644 index 0000000000000..6ba5a589def7f --- /dev/null +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp-convert-evl.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=pre-isel-intrinsic-lowering -expandvp-override-evl-transform=Convert -expandvp-override-mask-transform=Legal -S < %s | FileCheck %s + +define @unknown_evl( %x, %y, %m, i32 %evl) { +; CHECK-LABEL: define @unknown_evl( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]], i32 [[EVL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[M]] +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 2 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[TMP2]], i32 [[SCALABLE_SIZE]]) +; CHECK-NEXT: ret [[ADD]] +; + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +} + +define <2 x i64> @exact_evl_fixed(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) { +; CHECK-LABEL: define <2 x i64> @exact_evl_fixed( +; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i1> [[M:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> [[M]], i32 2) +; CHECK-NEXT: ret <2 x i64> [[ADD]] +; + %add = call <2 x i64> @llvm.vp.add(<2 x i64> poison, <2 x i64> poison, <2 x i1> %m, i32 2) + ret <2 x i64> %add +} + +define @exact_evl_vscale_mul( %x, %y, %m) { +; CHECK-LABEL: define @exact_evl_vscale_mul( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]]) { +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[EVL:%.*]] = mul nuw i32 [[VSCALE]], 2 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[M]], i32 [[EVL]]) +; CHECK-NEXT: ret [[ADD]] +; + %vscale = call i32 @llvm.vscale() + %evl = mul nuw i32 %vscale, 2 + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +} + +define @exact_evl_vscale_shl( %x, %y, %m) { +; CHECK-LABEL: define @exact_evl_vscale_shl( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]]) { +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[EVL:%.*]] = shl nuw i32 [[VSCALE]], 1 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[M]], i32 [[EVL]]) +; CHECK-NEXT: ret [[ADD]] +; + %vscale = call i32 @llvm.vscale() + %evl = shl nuw i32 %vscale, 1 + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +} + +define @exact_evl_vscale_mul_trunc( %x, %y, %m) { +; CHECK-LABEL: define @exact_evl_vscale_mul_trunc( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]]) { +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHL:%.*]] = mul nuw i64 [[VSCALE]], 2 +; CHECK-NEXT: [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[M]], i32 [[EVL]]) +; CHECK-NEXT: ret [[ADD]] +; + %vscale = call i64 @llvm.vscale() + %shl = mul nuw i64 %vscale, 2 + %evl = trunc nuw i64 %shl to i32 + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +} + + +define @exact_evl_vscale_shl_trunc( %x, %y, %m) { +; CHECK-LABEL: define @exact_evl_vscale_shl_trunc( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]]) { +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[VSCALE]], 1 +; CHECK-NEXT: [[EVL:%.*]] = trunc nuw i64 [[SHL]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[M]], i32 [[EVL]]) +; CHECK-NEXT: ret [[ADD]] +; + %vscale = call i64 @llvm.vscale() + %shl = shl nuw i64 %vscale, 1 + %evl = trunc nuw i64 %shl to i32 + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +} + +define @exact_evl_vscale_shl_trunc_no_nuw( %x, %y, %m) { +; CHECK-LABEL: define @exact_evl_vscale_shl_trunc_no_nuw( +; CHECK-SAME: [[X:%.*]], [[Y:%.*]], [[M:%.*]]) { +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[VSCALE]], 1 +; CHECK-NEXT: [[EVL:%.*]] = trunc i64 [[SHL]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[M]] +; CHECK-NEXT: [[VSCALE1:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE1]], 2 +; CHECK-NEXT: [[ADD:%.*]] = call @llvm.vp.add.nxv2i64( poison, poison, [[TMP2]], i32 [[SCALABLE_SIZE]]) +; CHECK-NEXT: ret [[ADD]] +; + %vscale = call i64 @llvm.vscale() + %shl = shl nuw i64 %vscale, 1 + %evl = trunc i64 %shl to i32 + %add = call @llvm.vp.add( poison, poison, %m, i32 %evl) + ret %add +}