Skip to content

Commit 90bc448

Browse files
committed
[VectorCombine][X86] foldShuffleOfIntrinsics - provide the arguments to a getShuffleCost call
Ensure the arguments are passed to the getShuffleCost calls to improve cost analysis, in particular if these are constant the costs will be recognised as free Noticed while reviewing llvm#170052
1 parent d68f543 commit 90bc448

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2924,8 +2924,9 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
29242924
auto *ArgTy = FixedVectorType::get(VecTy->getElementType(),
29252925
ShuffleDstTy->getNumElements());
29262926
NewArgsTy.push_back(ArgTy);
2927-
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
2928-
ArgTy, VecTy, OldMask, CostKind);
2927+
NewCost += TTI.getShuffleCost(
2928+
TargetTransformInfo::SK_PermuteTwoSrc, ArgTy, VecTy, OldMask,
2929+
CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)});
29292930
}
29302931
}
29312932
IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);

llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,11 @@ define <4 x float> @shuffle_fma_const_chain(<4 x float> %a0) {
1515
}
1616

1717
define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
18-
; SSE-LABEL: define <8 x float> @concat_fma_const_chain(
19-
; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
20-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21-
; SSE-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
22-
; SSE-NEXT: ret <8 x float> [[RES]]
23-
;
24-
; AVX-LABEL: define <8 x float> @concat_fma_const_chain(
25-
; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
26-
; AVX-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
27-
; AVX-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
28-
; AVX-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
29-
; AVX-NEXT: ret <8 x float> [[RES]]
18+
; CHECK-LABEL: define <8 x float> @concat_fma_const_chain(
19+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21+
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
22+
; CHECK-NEXT: ret <8 x float> [[RES]]
3023
;
3124
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
3225
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
@@ -35,12 +28,18 @@ define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
3528
}
3629

3730
define <8 x float> @interleave_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
38-
; CHECK-LABEL: define <8 x float> @interleave_fma_const_chain(
39-
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
41-
; CHECK-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
42-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
43-
; CHECK-NEXT: ret <8 x float> [[RES]]
31+
; SSE-LABEL: define <8 x float> @interleave_fma_const_chain(
32+
; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
33+
; SSE-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
34+
; SSE-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
35+
; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
36+
; SSE-NEXT: ret <8 x float> [[RES]]
37+
;
38+
; AVX-LABEL: define <8 x float> @interleave_fma_const_chain(
39+
; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
40+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
41+
; AVX-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
42+
; AVX-NEXT: ret <8 x float> [[RES]]
4443
;
4544
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
4645
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))

0 commit comments

Comments
 (0)