diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 12e08c09ea67d..e01dafd36d30d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1399,9 +1399,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { /// If all arguments of the intrinsic are unary shuffles with the same mask, /// try to shuffle after the intrinsic. -static Instruction * -foldShuffledIntrinsicOperands(IntrinsicInst *II, - InstCombiner::BuilderTy &Builder) { +Instruction * +InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) { // TODO: This should be extended to handle other intrinsics like fshl, ctpop, // etc. Use llvm::isTriviallyVectorizable() and related to determine // which intrinsics are safe to shuffle? @@ -1419,9 +1418,11 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II, } Value *X; + Constant *C; ArrayRef Mask; - if (!match(II->getArgOperand(0), - m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask)))) + auto *NonConstArg = find_if_not(II->args(), IsaPred); + if (!NonConstArg || + !match(NonConstArg, m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask)))) return nullptr; // At least 1 operand must have 1 use because we are creating 2 instructions. @@ -1429,15 +1430,21 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II, return nullptr; // See if all arguments are shuffled with the same mask. - SmallVector NewArgs(II->arg_size()); - NewArgs[0] = X; + SmallVector NewArgs; Type *SrcTy = X->getType(); - for (unsigned i = 1, e = II->arg_size(); i != e; ++i) { - if (!match(II->getArgOperand(i), - m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) || - X->getType() != SrcTy) + for (Value *Arg : II->args()) { + if (match(Arg, m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) && + X->getType() == SrcTy) + NewArgs.push_back(X); + else if (match(Arg, m_ImmConstant(C))) { + // If it's a constant, try find the constant that would be shuffled to C. + if (Constant *ShuffledC = + unshuffleConstant(Mask, C, cast(SrcTy))) + NewArgs.push_back(ShuffledC); + else + return nullptr; + } else return nullptr; - NewArgs[i] = X; } // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M @@ -3849,7 +3856,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Instruction *R = FoldOpIntoSelect(*II, Sel)) return R; - if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder)) + if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; // Some intrinsics (like experimental_gc_statepoint) can be used in invoke diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 8b657b3f8555c..5e0cd17fb1924 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -147,6 +147,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); Instruction *foldItoFPtoI(CastInst &FI); Instruction *visitSelectInst(SelectInst &SI); + Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II); Instruction *visitCallInst(CallInst &CI); Instruction *visitInvokeInst(InvokeInst &II); Instruction *visitCallBrInst(CallBrInst &CBI); @@ -604,6 +605,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldVectorBinop(BinaryOperator &Inst); Instruction *foldVectorSelect(SelectInst &Sel); Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf); + Constant *unshuffleConstant(ArrayRef ShMask, Constant *C, + VectorType *NewCTy); /// Given a binary operator, cast instruction, or select which has a PHI node /// as operand #0, see if we can fold the instruction into the PHI (which is diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 9ddcef0396e39..3dc89772676df 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2100,8 +2100,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { /// /// A 1-to-1 mapping is not required. Example: /// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = -static Constant *unshuffleConstant(ArrayRef ShMask, Constant *C, - VectorType *NewCTy) { +Constant *InstCombinerImpl::unshuffleConstant(ArrayRef ShMask, Constant *C, + VectorType *NewCTy) { if (isa(NewCTy)) { Constant *Splat = C->getSplatValue(); if (!Splat) diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll index ae0067d41426c..f0d4f776a5d90 100644 --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -802,6 +802,67 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float> ret <2 x float> %r } +define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fma_unary_shuffle_ops_1_const( +; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> , <2 x float> [[Y1:%.*]]) +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: ret <2 x float> [[B]] +; + %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> + %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> + %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> , <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) { +; CHECK-LABEL: @fma_unary_shuffle_ops_2_const( +; CHECK-NEXT: [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> , <2 x float> , <2 x float> [[X1:%.*]]) +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: ret <2 x float> [[A]] +; + %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> + %r = call <2 x float> @llvm.fma(<2 x float> , <2 x float> , <2 x float> %a) + ret <2 x float> %r +} + +define @fma_unary_shuffle_ops_1_const_scalable( %x, %y) { +; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable( +; CHECK-NEXT: [[R:%.*]] = call @llvm.fma.nxv2f32( [[A:%.*]], splat (float 4.200000e+01), [[B:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[R1]] +; + %a = shufflevector %x, poison, zeroinitializer + %b = shufflevector %y, poison, zeroinitializer + %r = call @llvm.fma( %a, splat (float 42.0), %b) + ret %r +} + +define @fma_unary_shuffle_ops_2_const_scalable( %x) { +; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable( +; CHECK-NEXT: [[X:%.*]] = call @llvm.fma.nxv2f32( splat (float 4.200000e+01), splat (float 4.200000e+01), [[X1:%.*]]) +; CHECK-NEXT: [[A:%.*]] = shufflevector [[X]], poison, zeroinitializer +; CHECK-NEXT: ret [[A]] +; + %a = shufflevector %x, poison, zeroinitializer + %r = call @llvm.fma( splat (float 42.0), splat (float 42.0), %a) + ret %r +} + +define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const( +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: call void @use_vec3(<3 x float> [[A]]) +; CHECK-NEXT: [[Y:%.*]] = call fast <2 x float> @llvm.fma.v2f32(<2 x float> [[X]], <2 x float> splat (float 4.200000e+01), <2 x float> [[Y1:%.*]]) +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: ret <3 x float> [[B]] +; + %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> + call void @use_vec3(<3 x float> %a) + %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> + %r = call fast <3 x float> @llvm.fma(<3 x float> %a, <3 x float> splat (float 42.0), <3 x float> %b) + ret <3 x float> %r +} + ; negative test - must have 3 shuffles define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) { diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 862853f992968..0325c60997dfd 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -930,6 +930,67 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3 ret <2 x i31> %r } +define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const( +; CHECK-NEXT: [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> , <2 x i32> [[X:%.*]], <2 x i32> [[Y1:%.*]]) +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[B]] +; + %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> + %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> + %r = call <2 x i32> @llvm.fshr(<2 x i32> , <2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %r +} + +define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) { +; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const( +; CHECK-NEXT: [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> , <2 x i32> , <2 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[A]] +; + %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> + %r = call <2 x i32> @llvm.fshr(<2 x i32> , <2 x i32> , <2 x i32> %a) + ret <2 x i32> %r +} + +define @fsh_unary_shuffle_ops_1_const_scalable( %x, %y) { +; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable( +; CHECK-NEXT: [[Y:%.*]] = call @llvm.fshr.nxv2i32( splat (i32 42), [[X:%.*]], [[Y1:%.*]]) +; CHECK-NEXT: [[B:%.*]] = shufflevector [[Y]], poison, zeroinitializer +; CHECK-NEXT: ret [[B]] +; + %a = shufflevector %x, poison, zeroinitializer + %b = shufflevector %y, poison, zeroinitializer + %r = call @llvm.fshr( splat (i32 42), %a, %b) + ret %r +} + +define @fsh_unary_shuffle_ops_2_const_scalable( %x) { +; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable( +; CHECK-NEXT: [[X:%.*]] = call @llvm.fshr.nxv2i32( splat (i32 42), splat (i32 42), [[X1:%.*]]) +; CHECK-NEXT: [[A:%.*]] = shufflevector [[X]], poison, zeroinitializer +; CHECK-NEXT: ret [[A]] +; + %a = shufflevector %x, poison, zeroinitializer + %r = call @llvm.fshr( splat (i32 42), splat (i32 42), %a) + ret %r +} + +define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const( +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: call void @use_v3(<3 x i32> [[A]]) +; CHECK-NEXT: [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> splat (i32 42), <2 x i32> [[X]], <2 x i32> [[Y1:%.*]]) +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[B]] +; + %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> + call void @use_v3(<3 x i32> %a) + %b = shufflevector <2 x i32> %y, <2 x i32> poison, <3 x i32> + %r = call <3 x i32> @llvm.fshr(<3 x i32> splat (i32 42), <3 x i32> %a, <3 x i32> %b) + ret <3 x i32> %r +} + ; negative test - must have 3 shuffles define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 9a8608da9fd5b..38930956eda2f 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -2416,6 +2416,39 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) { ret <3 x i8> %r } +define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) { +; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const( +; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X1:%.*]], <3 x i8> ) +; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i8> [[SX]] +; + %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> + %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> ) + ret <3 x i8> %r +} + +define @smax_unary_shuffle_ops_lhs_const_scalable( %x) { +; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable( +; CHECK-NEXT: [[R:%.*]] = call @llvm.smax.nxv3i8( [[SX:%.*]], splat (i8 42)) +; CHECK-NEXT: [[R1:%.*]] = shufflevector [[R]], poison, zeroinitializer +; CHECK-NEXT: ret [[R1]] +; + %sx = shufflevector %x, poison, zeroinitializer + %r = call @llvm.smax( %sx, splat (i8 42)) + ret %r +} + +define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) { +; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening( +; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X1:%.*]], <2 x i8> ) +; CHECK-NEXT: [[SX:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> poison, <3 x i32> +; CHECK-NEXT: ret <3 x i8> [[SX]] +; + %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> + %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> ) + ret <3 x i8> %r +} + ; negative test - must have 2 shuffles define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {