Skip to content

Commit 676c641

Browse files
authored
[VectorCombine] Use getInstructionCost to cost Shuffle. (#122068)
This allows it to produce a more accurate cost for the shuffle, using the more accurate calls to getShuffleCost in getInstructionCost. It helps fix some of the regressions from vector combine a little while ago, now that we have better subvector extract costs.
1 parent 0b4fca5 commit 676c641

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,9 +2023,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
20232023
if (Match1)
20242024
InnerCost1 = TTI.getInstructionCost(cast<Instruction>(OuterV1), CostKind);
20252025

2026-
InstructionCost OuterCost = TTI.getShuffleCost(
2027-
TargetTransformInfo::SK_PermuteTwoSrc, ShuffleImmTy, OuterMask, CostKind,
2028-
0, nullptr, {OuterV0, OuterV1}, &I);
2026+
InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind);
20292027

20302028
InstructionCost OldCost = InnerCost0 + InnerCost1 + OuterCost;
20312029

llvm/test/Transforms/PhaseOrdering/AArch64/block_scaling_decompr_8bit.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -416,21 +416,23 @@ define internal noundef <8 x i16> @_ZL24cmplx_mul_combined_re_im11__Int16x8_t20c
416416
; CHECK-NEXT: [[SCALE_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[SCALE_COERCE]] to i16
417417
; CHECK-NEXT: [[SCALE_SROA_2_0_EXTRACT_SHIFT36:%.*]] = lshr i64 [[SCALE_COERCE]], 16
418418
; CHECK-NEXT: [[SCALE_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[SCALE_SROA_2_0_EXTRACT_SHIFT36]] to i16
419+
; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
419420
; CHECK-NEXT: [[VECINIT_I19:%.*]] = insertelement <8 x i16> poison, i16 [[SCALE_SROA_0_0_EXTRACT_TRUNC]], i64 0
420421
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[SCALE_SROA_2_0_EXTRACT_TRUNC]], i64 0
421422
; CHECK-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
422423
; CHECK-NEXT: [[VQNEGQ_V1_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VECINIT7_I]])
424+
; CHECK-NEXT: [[VBSL5_I:%.*]] = shufflevector <8 x i16> [[VQNEGQ_V1_I]], <8 x i16> [[VECINIT_I]], <8 x i32> <i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
423425
; CHECK-NEXT: [[SHUFFLE_I85:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
424426
; CHECK-NEXT: [[SHUFFLE_I82:%.*]] = shufflevector <8 x i16> [[VECINIT_I19]], <8 x i16> poison, <4 x i32> zeroinitializer
425427
; CHECK-NEXT: [[VQDMULL_V2_I72:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I85]], <4 x i16> [[SHUFFLE_I82]])
426428
; CHECK-NEXT: [[SHUFFLE_I97:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
427429
; CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I97]], <4 x i16> [[SHUFFLE_I82]])
428-
; CHECK-NEXT: [[SHUFFLE_I79:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
429-
; CHECK-NEXT: [[SHUFFLE_I76:%.*]] = shufflevector <8 x i16> [[VQNEGQ_V1_I]], <8 x i16> [[VECINIT_I]], <4 x i32> <i32 0, i32 8, i32 2, i32 8>
430+
; CHECK-NEXT: [[SHUFFLE_I79:%.*]] = shufflevector <8 x i16> [[SHUFFLE_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
431+
; CHECK-NEXT: [[SHUFFLE_I76:%.*]] = shufflevector <8 x i16> [[VBSL5_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
430432
; CHECK-NEXT: [[VQDMLAL2_I106:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I79]], <4 x i16> [[SHUFFLE_I76]])
431433
; CHECK-NEXT: [[VQDMLAL_V3_I107:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMULL_V2_I72]], <4 x i32> [[VQDMLAL2_I106]])
432-
; CHECK-NEXT: [[SHUFFLE_I91:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 5, i32 4, i32 7, i32 6>
433-
; CHECK-NEXT: [[SHUFFLE_I88:%.*]] = shufflevector <8 x i16> [[VQNEGQ_V1_I]], <8 x i16> [[VECINIT_I]], <4 x i32> <i32 4, i32 8, i32 6, i32 8>
434+
; CHECK-NEXT: [[SHUFFLE_I91:%.*]] = shufflevector <8 x i16> [[SHUFFLE_I]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
435+
; CHECK-NEXT: [[SHUFFLE_I88:%.*]] = shufflevector <8 x i16> [[VBSL5_I]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
434436
; CHECK-NEXT: [[VQDMLAL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I91]], <4 x i16> [[SHUFFLE_I88]])
435437
; CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMULL_V2_I]], <4 x i32> [[VQDMLAL2_I]])
436438
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VQDMLAL_V3_I107]] to <8 x i16>

0 commit comments

Comments
 (0)