Skip to content

Commit 9a99944

Browse files
committed
[SLP]Use source vector type as the original vector type instead of
artificial for better cost estimation. Need to use original source vector type, not the one artificially constructed, based on the number of vectorized scalars. It affect the cost significantly.
1 parent f8ae2e4 commit 9a99944

13 files changed

+657
-293
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7073,7 +7073,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
70737073
/// extracted values from \p VL.
70747074
InstructionCost computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
70757075
TTI::ShuffleKind ShuffleKind) {
7076-
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
7076+
auto *VecTy = cast<FixedVectorType>(
7077+
cast<ExtractElementInst>(*find_if(VL, [](Value *V) {
7078+
return isa<ExtractElementInst>(V);
7079+
}))->getVectorOperandType());
70777080
unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
70787081

70797082
if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc ||

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,19 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295+
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296+
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297+
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
296300
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
297-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298-
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301-
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
301+
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302+
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304+
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305+
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306+
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
302307
;
303308
bb:
304309
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,19 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295+
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296+
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297+
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
296300
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
297-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298-
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301-
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
301+
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302+
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304+
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305+
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306+
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
302307
;
303308
bb:
304309
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,15 @@
44
define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) {
55
; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> <i32 poison, i32 8>
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> <i32 7, i32 8>
9-
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
10-
; CHECK-NEXT: ret <2 x i16> [[TMP2]]
7+
; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7
8+
; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8
9+
; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7
10+
; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8
11+
; CHECK-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
12+
; CHECK-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
13+
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_1]], i64 0
14+
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[ADD_2]], i64 1
15+
; CHECK-NEXT: ret <2 x i16> [[INS_2]]
1116
;
1217
bb:
1318
%arg0.1 = extractelement <9 x i16> undef, i64 7

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,23 @@
44
define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
55
; CHECK-LABEL: @phis(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
8-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
7+
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
8+
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
9+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
910
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
1011
; CHECK: bb0:
11-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
12-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
12+
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
13+
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
14+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
1315
; CHECK-NEXT: br label [[BB1]]
1416
; CHECK: bb1:
15-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
16-
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
17-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
18-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
19-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20-
; CHECK-NEXT: ret <4 x half> [[TMP8]]
17+
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ]
18+
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ]
19+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
20+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
22+
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
23+
; CHECK-NEXT: ret <4 x half> [[O3]]
2124
;
2225
entry:
2326
%a0 = extractelement <4 x half> %in1, i64 0
@@ -49,20 +52,23 @@ bb1:
4952
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
5053
; CHECK-LABEL: @phis_reverse(
5154
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
53-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
55+
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
56+
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
57+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
5458
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
5559
; CHECK: bb0:
56-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
57-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
60+
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
61+
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
62+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
5863
; CHECK-NEXT: br label [[BB1]]
5964
; CHECK: bb1:
60-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
61-
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
62-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
63-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
65-
; CHECK-NEXT: ret <4 x half> [[TMP8]]
65+
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ]
66+
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ]
67+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
68+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
69+
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
70+
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
71+
; CHECK-NEXT: ret <4 x half> [[O3]]
6672
;
6773
entry:
6874
%a0 = extractelement <4 x half> %in1, i64 0

0 commit comments

Comments
 (0)