diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 97f8569b63529..a2ce9c97bb50e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5479,6 +5479,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost( VectorType *NTp = VectorType::get(Tp->getScalarType(), LT.second.getVectorElementCount()); InstructionCost Cost; + std::map>, InstructionCost> + PreviousCosts; for (unsigned N = 0; N < NumVecs; N++) { SmallVector NMask; // Split the existing mask into chunks of size LTNumElts. Track the source @@ -5515,15 +5517,26 @@ InstructionCost AArch64TTIImpl::getShuffleCost( else NMask.push_back(MaskElt % LTNumElts); } + // Check if we have already generated this sub-shuffle, which means we + // will have already generated the output. For example a <16 x i32> splat + // will be the same sub-splat 4 times, which only needs to be generated + // once and reused. + auto Result = + PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0}); + // Check if it was already in the map (already costed). + if (!Result.second) + continue; // If the sub-mask has at most 2 input sub-vectors then re-cost it using // getShuffleCost. If not then cost it using the worst case as the number // of element moves into a new vector. - if (NumSources <= 2) - Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc + InstructionCost NCost = + NumSources <= 2 + ? getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, - NTp, NMask, CostKind, 0, nullptr, Args, CxtI); - else - Cost += LTNumElts; + NTp, NMask, CostKind, 0, nullptr, Args, CxtI) + : LTNumElts; + Result.first->second = NCost; + Cost += NCost; } return Cost; } diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll index 43bd2066ce520..5367344ce573f 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div.ll @@ -123,17 +123,17 @@ define void @sdiv_uniform() { ; CHECK-LABEL: 'sdiv_uniform' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, %V2i64_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, %V4i64_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, %V8i64_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, %V2i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, %V4i32_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, %V8i32_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, %V16i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, %V2i16_s @@ -141,9 +141,9 @@ define void @sdiv_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, %V4i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, %V8i16_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, %V16i16_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, %V32i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, %V2i8_s @@ -153,9 +153,9 @@ define void @sdiv_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, %V8i8_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, %V16i8_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, %V32i8_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, %V64i8_s ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -206,17 +206,17 @@ define void @udiv_uniform() { ; CHECK-LABEL: 'udiv_uniform' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = udiv <2 x i64> undef, %V2i64_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = udiv <4 x i64> undef, %V4i64_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = udiv <8 x i64> undef, %V8i64_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = udiv <2 x i32> undef, %V2i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = udiv <4 x i32> undef, %V4i32_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = udiv <8 x i32> undef, %V8i32_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = udiv <16 x i32> undef, %V16i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = udiv <2 x i16> undef, %V2i16_s @@ -224,9 +224,9 @@ define void @udiv_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = udiv <4 x i16> undef, %V4i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = udiv <8 x i16> undef, %V8i16_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = udiv <16 x i16> undef, %V16i16_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = udiv <32 x i16> undef, %V32i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = udiv <2 x i8> undef, %V2i8_s @@ -236,9 +236,9 @@ define void @udiv_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:88 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = udiv <8 x i8> undef, %V8i8_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:168 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = udiv <16 x i8> undef, %V16i8_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:328 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = udiv <32 x i8> undef, %V32i8_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:648 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = udiv <64 x i8> undef, %V64i8_s ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll index 1a56a27422e1f..d684e3af00b83 100644 --- a/llvm/test/Analysis/CostModel/AArch64/rem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll @@ -123,17 +123,17 @@ define void @srem_uniform() { ; CHECK-LABEL: 'srem_uniform' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, %V2i64_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, %V4i64_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, %V8i64_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, %V2i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, %V4i32_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, %V8i32_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, %V16i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, %V2i16_s @@ -141,9 +141,9 @@ define void @srem_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, %V4i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, %V8i16_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, %V16i16_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:224 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, %V32i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, %V2i8_s @@ -153,9 +153,9 @@ define void @srem_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, %V8i8_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, %V16i8_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:224 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, %V32i8_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:448 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, %V64i8_s ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -206,17 +206,17 @@ define void @urem_uniform() { ; CHECK-LABEL: 'urem_uniform' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i64_s = shufflevector <2 x i64> poison, <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = urem <2 x i64> undef, %V2i64_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i64_s = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = urem <4 x i64> undef, %V4i64_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i64_s = shufflevector <8 x i64> poison, <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = urem <8 x i64> undef, %V8i64_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i32_s = shufflevector <2 x i32> poison, <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = urem <2 x i32> undef, %V2i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V4i32_s = shufflevector <4 x i32> poison, <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = urem <4 x i32> undef, %V4i32_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i32_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = urem <8 x i32> undef, %V8i32_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i32_s = shufflevector <16 x i32> poison, <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = urem <16 x i32> undef, %V16i32_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i16_s = shufflevector <2 x i16> poison, <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = urem <2 x i16> undef, %V2i16_s @@ -224,9 +224,9 @@ define void @urem_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:28 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = urem <4 x i16> undef, %V4i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V8i16_s = shufflevector <8 x i16> poison, <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = urem <8 x i16> undef, %V8i16_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i16_s = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = urem <16 x i16> undef, %V16i16_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i16_s = shufflevector <32 x i16> poison, <32 x i16> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:224 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = urem <32 x i16> undef, %V32i16_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V2i8_s = shufflevector <2 x i8> poison, <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = urem <2 x i8> undef, %V2i8_s @@ -236,9 +236,9 @@ define void @urem_uniform() { ; CHECK-NEXT: Cost Model: Found costs of RThru:56 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = urem <8 x i8> undef, %V8i8_s ; CHECK-NEXT: Cost Model: Found costs of 1 for: %V16i8_s = shufflevector <16 x i8> poison, <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:112 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = urem <16 x i8> undef, %V16i8_s -; CHECK-NEXT: Cost Model: Found costs of 2 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32i8_s = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:224 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = urem <32 x i8> undef, %V32i8_s -; CHECK-NEXT: Cost Model: Found costs of 4 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64i8_s = shufflevector <64 x i8> poison, <64 x i8> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:448 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = urem <64 x i8> undef, %V64i8_s ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll index 6175ea48c5631..2902c7b989047 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-broadcast.ll @@ -10,38 +10,38 @@ define void @broadcast() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1i16 = shufflevector <1 x i16> undef, <1 x i16> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1i32 = shufflevector <1 x i32> undef, <1 x i32> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1i64 = shufflevector <1 x i64> undef, <1 x i64> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1i128 = shufflevector <1 x i128> undef, <1 x i128> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i128 = shufflevector <2 x i128> undef, <2 x i128> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1f16 = shufflevector <1 x half> undef, <1 x half> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1bf16 = shufflevector <1 x bfloat> undef, <1 x bfloat> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1f32 = shufflevector <1 x float> undef, <1 x float> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v1f64 = shufflevector <1 x double> undef, <1 x double> undef, <1 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v1i8 = shufflevector <1 x i8> undef, <1 x i8> undef, <1 x i32> zeroinitializer @@ -171,39 +171,39 @@ define void @broadcast_double() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v1i16 = shufflevector <1 x i16> undef, <1 x i16> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v1i32 = shufflevector <1 x i32> undef, <1 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v1i64 = shufflevector <1 x i64> undef, <1 x i64> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %v1i128 = shufflevector <1 x i128> undef, <1 x i128> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 0 for: %v2i128 = shufflevector <2 x i128> undef, <2 x i128> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1f16 = shufflevector <1 x half> undef, <1 x half> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1bf16 = shufflevector <1 x bfloat> undef, <1 x bfloat> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1f32 = shufflevector <1 x float> undef, <1 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %v1f64 = shufflevector <1 x double> undef, <1 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v1i8 = shufflevector <1 x i8> undef, <1 x i8> undef, <2 x i32> zeroinitializer diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll index 068fffb68c85e..dc80267360ea6 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-load.ll @@ -25,17 +25,17 @@ define void @shuffle() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv8i16 = load <8 x i16>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv16i16 = load <16 x i16>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv2i32 = load <2 x i32>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv4i32 = load <4 x i32>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv8i32 = load <8 x i32>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv2i64 = load <2 x i64>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv4i64 = load <4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv2f16 = load <2 x half>, ptr undef, align 4 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv4f16 = load <4 x half>, ptr undef, align 8 @@ -43,17 +43,17 @@ define void @shuffle() { ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv8f16 = load <8 x half>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv16f16 = load <16 x half>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv2f32 = load <2 x float>, ptr undef, align 8 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv4f32 = load <4 x float>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv8f32 = load <8 x float>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:4 SizeLat:1 for: %lv2f64 = load <2 x double>, ptr undef, align 16 ; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:4 SizeLat:2 for: %lv4f64 = load <4 x double>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:0 Lat:2 SizeLat:2 for: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:0 Lat:1 SizeLat:1 for: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %lv2i8 = load <2 x i8>, ptr undef diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll index d152ef1caa672..8d68781d0c1b7 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -12,7 +12,7 @@ define void @shuffle() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 8 for: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v10b = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 8 for: %v10b = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> @@ -365,7 +365,7 @@ define void @multipart() { ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16c = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16d = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32a = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v32a4 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32a4 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v32idrev = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32many = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32many2 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll index 00030daaaa97b..96dc57936c65b 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll @@ -239,39 +239,39 @@ define void @splatstore(ptr %p) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: store <8 x i8> %v8i8, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: store <16 x i8> %v16i8, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:1 SizeLat:2 for: store <32 x i8> %v32i8, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:1 SizeLat:4 for: store <64 x i8> %v64i8, ptr %p, align 64 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: store <4 x i16> %v4i16, ptr %p, align 8 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: store <8 x i16> %v8i16, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:1 SizeLat:2 for: store <16 x i16> %v16i16, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:1 SizeLat:4 for: store <32 x i16> %v32i16, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found costs of 8 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:1 SizeLat:8 for: store <64 x i16> %v64i16, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: store <4 x i32> %v4i32, ptr %p, align 16 -; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:1 SizeLat:2 for: store <8 x i32> %v8i32, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:1 SizeLat:4 for: store <16 x i32> %v16i32, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found costs of 8 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:1 SizeLat:8 for: store <32 x i32> %v32i32, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:1 SizeLat:16 for: store <64 x i32> %v64i32, ptr %p, align 256 ; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:1 SizeLat:2 for: store <4 x i64> %v4i64, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found costs of 4 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:4 Lat:1 SizeLat:4 for: store <8 x i64> %v8i64, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found costs of 8 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:8 Lat:1 SizeLat:8 for: store <16 x i64> %v16i64, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found costs of 16 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:16 Lat:1 SizeLat:16 for: store <32 x i64> %v32i64, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found costs of 32 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found costs of 1 for: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:32 Lat:1 SizeLat:32 for: store <64 x i64> %v64i64, ptr %p, align 512 ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ;