diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ca1a486901951..4a03af5d47481 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5207,7 +5207,8 @@ AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy, return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, VecTy, CostKind); } -InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { +InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index, + TTI::TargetCostKind CostKind) { static const CostTblEntry ShuffleTbl[] = { { TTI::SK_Splice, MVT::nxv16i8, 1 }, { TTI::SK_Splice, MVT::nxv8i16, 1 }, @@ -5233,7 +5234,6 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { std::pair LT = getTypeLegalizationCost(Tp); Type *LegalVTy = EVT(LT.second).getTypeForEVT(Tp->getContext()); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; EVT PromotedVT = LT.second.getScalarType() == MVT::i1 ? TLI->getPromotedVTForPredicate(EVT(LT.second)) : LT.second; @@ -5616,7 +5616,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost( } if (Kind == TTI::SK_Splice && isa(Tp)) - return getSpliceCost(Tp, Index); + return getSpliceCost(Tp, Index, CostKind); // Inserting a subvector can often be done with either a D, S or H register // move, so long as the inserted vector is "aligned". diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index ae0df6b895ec8..372ec22bd548f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -219,7 +219,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { VectorType *ValTy, TTI::TargetCostKind CostKind); - InstructionCost getSpliceCost(VectorType *Tp, int Index); + InstructionCost getSpliceCost(VectorType *Tp, int Index, + TTI::TargetCostKind CostKind); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 1483d476bef0d..1716f48f4b042 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -638,10 +638,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call @llvm.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call @llvm.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call @llvm.vector.splice.nxv1i16( zeroinitializer, zeroinitializer, i32 -1) @@ -671,10 +671,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call @llvm.vector.splice.nxv1i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; @@ -702,10 +702,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv4bf16 = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %splice_nxv8bf16 = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %splice_nxv16bf16 = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv16i1 = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv8i1 = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv4i1 = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %splice_nxv2i1 = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv16i8_neg = call @llvm.vector.splice.nxv16i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv32i8_neg = call @llvm.vector.splice.nxv32i8( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i16_neg = call @llvm.vector.splice.nxv1i16( zeroinitializer, zeroinitializer, i32 -1) @@ -735,10 +735,10 @@ define void @vector_splice() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv4bf16_neg = call @llvm.vector.splice.nxv4bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %splice_nxv8bf16_neg = call @llvm.vector.splice.nxv8bf16( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16bf16_neg = call @llvm.vector.splice.nxv16bf16( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv16i1_neg = call @llvm.vector.splice.nxv16i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv8i1_neg = call @llvm.vector.splice.nxv8i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv4i1_neg = call @llvm.vector.splice.nxv4i1( zeroinitializer, zeroinitializer, i32 -1) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %splice_nxv2i1_neg = call @llvm.vector.splice.nxv2i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %splice_nxv1i1_neg = call @llvm.vector.splice.nxv1i1( zeroinitializer, zeroinitializer, i32 -1) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ;