diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index d513e9472a152..12f87226c5f57 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1534,6 +1534,10 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { ArrayRef Mask = Shuffle->getShuffleMask(); int NumSubElts, SubIndex; + // Treat undef/poison mask as free (no matter the length). + if (all_of(Mask, [](int M) { return M < 0; })) + return TTI::TCC_Free; + // TODO: move more of this inside improveShuffleKindFromMask. if (Shuffle->changesLength()) { // Treat a 'subvector widening' as a free shuffle. diff --git a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll index dc770f4ff74c8..777fb5b2f57d4 100644 --- a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -758,3 +758,22 @@ define <32 x i8> @test_v32i8_3(<32 x i8> %a, <32 x i8> %b) { %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %1 } + +; Treat all undef/poison shuffle masks as free. +define <2 x i32> @test_v2i32_poison(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: 'test_v2i32_poison' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s = shufflevector <2 x i32> %a0, <2 x i32> %a1, <2 x i32> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %s +; + %s = shufflevector <2 x i32> %a0, <2 x i32> %a1, <2 x i32> poison + ret <2 x i32> %s +} + +define <4 x float> @test_v4f32_v2f32_poison(<2 x float> %a0, <2 x float> %a1) { +; CHECK-LABEL: 'test_v4f32_v2f32_poison' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s = shufflevector <2 x float> %a0, <2 x float> %a1, <4 x i32> poison +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %s +; + %s = shufflevector <2 x float> %a0, <2 x float> %a1, <4 x i32> poison + ret <4 x float> %s +}