Skip to content

Commit c2472be

Browse files
authored
[VectorCombine][X86] foldShuffleOfIntrinsics - provide the arguments to a getShuffleCost call (#170465)
Ensure the arguments are passed to the getShuffleCost calls to improve cost analysis, in particular if these are constant the costs will be recognised as free Noticed while reviewing #170052
1 parent 907c94b commit c2472be

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2924,8 +2924,9 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
29242924
auto *ArgTy = FixedVectorType::get(VecTy->getElementType(),
29252925
ShuffleDstTy->getNumElements());
29262926
NewArgsTy.push_back(ArgTy);
2927-
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
2928-
ArgTy, VecTy, OldMask, CostKind);
2927+
NewCost += TTI.getShuffleCost(
2928+
TargetTransformInfo::SK_PermuteTwoSrc, ArgTy, VecTy, OldMask,
2929+
CostKind, 0, nullptr, {II0->getArgOperand(I), II1->getArgOperand(I)});
29292930
}
29302931
}
29312932
IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);

llvm/test/Transforms/VectorCombine/X86/shuffle-of-fma-const.ll

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,11 @@ define <4 x float> @shuffle_fma_const_chain(<4 x float> %a0) {
1515
}
1616

1717
define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
18-
; SSE-LABEL: define <8 x float> @concat_fma_const_chain(
19-
; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
20-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21-
; SSE-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
22-
; SSE-NEXT: ret <8 x float> [[RES]]
23-
;
24-
; AVX-LABEL: define <8 x float> @concat_fma_const_chain(
25-
; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
26-
; AVX-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
27-
; AVX-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
28-
; AVX-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
29-
; AVX-NEXT: ret <8 x float> [[RES]]
18+
; CHECK-LABEL: define <8 x float> @concat_fma_const_chain(
19+
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
20+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
21+
; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
22+
; CHECK-NEXT: ret <8 x float> [[RES]]
3023
;
3124
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
3225
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
@@ -35,12 +28,18 @@ define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
3528
}
3629

3730
define <8 x float> @interleave_fma_const_chain(<4 x float> %a0, <4 x float> %a1) {
38-
; CHECK-LABEL: define <8 x float> @interleave_fma_const_chain(
39-
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
40-
; CHECK-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
41-
; CHECK-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
42-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
43-
; CHECK-NEXT: ret <8 x float> [[RES]]
31+
; SSE-LABEL: define <8 x float> @interleave_fma_const_chain(
32+
; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
33+
; SSE-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
34+
; SSE-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
35+
; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
36+
; SSE-NEXT: ret <8 x float> [[RES]]
37+
;
38+
; AVX-LABEL: define <8 x float> @interleave_fma_const_chain(
39+
; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
40+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
41+
; AVX-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000))
42+
; AVX-NEXT: ret <8 x float> [[RES]]
4443
;
4544
%l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))
4645
%h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000))

0 commit comments

Comments
 (0)