Skip to content

Commit bcf6ebf

Browse files
alexey-bataevbcahoon
authored andcommitted
[SLP]Check scalars before trying scheduling.
Need to check the scalars if they can be vectorized before trying to schedule them. It may save compile time and improve vectorization on large functions/basic blocks. Differential Revision: https://reviews.llvm.org/D154891 This patch is cherry-picked to address SWDEV-434847 Change-Id: I1a7e41a35050232a9c6703beb32079a3cdc2b0df
1 parent c9906a6 commit bcf6ebf

15 files changed

+1018
-958
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 361 additions & 304 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll

Lines changed: 176 additions & 173 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -828,10 +828,10 @@ declare i32 @llvm.abs.i32(i32, i1)
828828

829829
define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) {
830830
; CHECK-LABEL: @stride_sum_abs_diff(
831-
; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[STRIDE:%.*]]
832-
; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 [[STRIDE]]
833-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4
834-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
831+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 4
832+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q:%.*]], align 4
833+
; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[STRIDE:%.*]]
834+
; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[STRIDE]]
835835
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
836836
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
837837
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>

llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ define void @bcast_vals2(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E, ptr %S) {
6060
; CHECK-LABEL: @bcast_vals2(
6161
; CHECK-NEXT: entry:
6262
; CHECK-NEXT: [[A0:%.*]] = load i16, ptr [[A:%.*]], align 8
63-
; CHECK-NEXT: [[V1:%.*]] = sext i16 [[A0]] to i32
6463
; CHECK-NEXT: [[B0:%.*]] = load i16, ptr [[B:%.*]], align 8
6564
; CHECK-NEXT: [[C0:%.*]] = load i16, ptr [[C:%.*]], align 8
6665
; CHECK-NEXT: [[D0:%.*]] = load i16, ptr [[D:%.*]], align 8
6766
; CHECK-NEXT: [[E0:%.*]] = load i16, ptr [[E:%.*]], align 8
67+
; CHECK-NEXT: [[V1:%.*]] = sext i16 [[A0]] to i32
6868
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[B0]], i32 0
6969
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> [[TMP0]], i16 [[C0]], i32 1
7070
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[E0]], i32 2

llvm/test/Transforms/SLPVectorizer/X86/buildvector-with-reuses.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
define <4 x double> @test(ptr %ia, ptr %ib, ptr %ic, ptr %id, ptr %ie, ptr %x) {
55
; CHECK-LABEL: define <4 x double> @test
66
; CHECK-SAME: (ptr [[IA:%.*]], ptr [[IB:%.*]], ptr [[IC:%.*]], ptr [[ID:%.*]], ptr [[IE:%.*]], ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
7-
; CHECK-NEXT: [[I4275:%.*]] = load double, ptr [[ID]], align 8
8-
; CHECK-NEXT: [[I4326:%.*]] = load <4 x double>, ptr [[X]], align 8
97
; CHECK-NEXT: [[I4238:%.*]] = load double, ptr [[IA]], align 8
108
; CHECK-NEXT: [[I4252:%.*]] = load double, ptr [[IB]], align 8
119
; CHECK-NEXT: [[I4264:%.*]] = load double, ptr [[IC]], align 8
10+
; CHECK-NEXT: [[I4275:%.*]] = load double, ptr [[ID]], align 8
1211
; CHECK-NEXT: [[I4277:%.*]] = load double, ptr [[IE]], align 8
12+
; CHECK-NEXT: [[I4326:%.*]] = load <4 x double>, ptr [[X]], align 8
1313
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[I4326]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
1414
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[I4275]], i32 1
1515
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>

llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ for.end48: ; preds = %for.end44
9393
define void @zot(ptr %arg) {
9494
; CHECK-LABEL: @zot(
9595
; CHECK-NEXT: bb:
96-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], ptr [[ARG:%.*]], i64 0, i32 1
9796
; CHECK-NEXT: [[TMP:%.*]] = load double, ptr undef, align 8
9897
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr undef, align 8
98+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], ptr [[ARG:%.*]], i64 0, i32 1
9999
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
100100
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1
101101
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef

llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ define void @lookahead_external_uses(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S, ptr
207207
; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8
208208
; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8
209209
; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8
210-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
211210
; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8
212211
; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8
213212
; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
213+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
214214
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
215215
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1
216216
; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]]
@@ -285,10 +285,10 @@ define void @lookahead_limit_users_budget(ptr %A, ptr %B, ptr %C, ptr %D, ptr %S
285285
; CHECK-NEXT: [[B0:%.*]] = load double, ptr [[B]], align 8
286286
; CHECK-NEXT: [[C0:%.*]] = load double, ptr [[C:%.*]], align 8
287287
; CHECK-NEXT: [[D0:%.*]] = load double, ptr [[D:%.*]], align 8
288-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
289288
; CHECK-NEXT: [[B2:%.*]] = load double, ptr [[IDXB2]], align 8
290289
; CHECK-NEXT: [[A2:%.*]] = load double, ptr [[IDXA2]], align 8
291290
; CHECK-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
291+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A]], align 8
292292
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
293293
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B2]], i32 1
294294
; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP0]], [[TMP2]]

llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ target triple = "i386-apple-macosx10.9.0"
99
define void @test(ptr %i1, ptr %i2, ptr %o) {
1010
; CHECK-LABEL: @test(
1111
; CHECK-NEXT: entry:
12-
; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr double, ptr [[I1:%.*]], i64 1
13-
; CHECK-NEXT: [[I1_0:%.*]] = load double, ptr [[I1]], align 16
12+
; CHECK-NEXT: [[I1_0:%.*]] = load double, ptr [[I1:%.*]], align 16
13+
; CHECK-NEXT: [[I1_GEP1:%.*]] = getelementptr double, ptr [[I1]], i64 1
1414
; CHECK-NEXT: [[I1_1:%.*]] = load double, ptr [[I1_GEP1]], align 16
1515
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I1_0]], i32 0
1616
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[I1_1]], i32 1
1717
; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]]
1818
; CHECK: then:
19-
; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds double, ptr [[I2:%.*]], i64 1
20-
; CHECK-NEXT: [[I2_0:%.*]] = load double, ptr [[I2]], align 16
19+
; CHECK-NEXT: [[I2_0:%.*]] = load double, ptr [[I2:%.*]], align 16
20+
; CHECK-NEXT: [[I2_GEP1:%.*]] = getelementptr inbounds double, ptr [[I2]], i64 1
2121
; CHECK-NEXT: [[I2_1:%.*]] = load double, ptr [[I2_GEP1]], align 16
2222
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[I2_0]], i32 0
2323
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[I2_1]], i32 1

0 commit comments

Comments
 (0)