11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-12 -pass-remarks-output=%t < %s | FileCheck %s
33; RUN: cat %t | FileCheck -check-prefix=YAML %s
4- ; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=' slp-vectorizer' -slp-threshold=-12 -pass-remarks-output=%t < %s | FileCheck %s
4+ ; RUN: opt -S -mtriple=aarch64--linux-gnu -passes=slp-vectorizer -slp-threshold=-12 -pass-remarks-output=%t < %s | FileCheck %s
55; RUN: cat %t | FileCheck -check-prefix=YAML %s
66
77; These tests check that we remove from consideration pairs of seed
2626; YAML-NEXT: Function: getelementptr_4x32
2727; YAML-NEXT: Args:
2828; YAML-NEXT: - String: 'SLP vectorized with cost '
29- ; YAML-NEXT: - Cost: '8 '
29+ ; YAML-NEXT: - Cost: '6 '
3030; YAML-NEXT: - String: ' and with tree size '
3131; YAML-NEXT: - TreeSize: '3'
3232
3636; YAML-NEXT: Function: getelementptr_4x32
3737; YAML-NEXT: Args:
3838; YAML-NEXT: - String: 'SLP vectorized with cost '
39- ; YAML-NEXT: - Cost: '10 '
39+ ; YAML-NEXT: - Cost: '8 '
4040; YAML-NEXT: - String: ' and with tree size '
4141; YAML-NEXT: - TreeSize: '3'
4242
@@ -46,48 +46,44 @@ define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
4646; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
4747; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
4848; CHECK: for.body.preheader:
49- ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[X:%.*]], i64 1
50- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
51- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[Z:%.*]], i64 1
49+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[X:%.*]], i32 1
50+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
51+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[Z:%.*]], i32 1
5252; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5353; CHECK: for.cond.cleanup.loopexit:
54- ; CHECK-NEXT: [[ADD16:%.*]] = extractelement <2 x i32> [[TMP21 :%.*]], i64 0
54+ ; CHECK-NEXT: [[ADD16:%.*]] = extractelement <2 x i32> [[TMP17 :%.*]], i32 0
5555; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
5656; CHECK: for.cond.cleanup:
5757; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD16]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
5858; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
5959; CHECK: for.body:
60- ; CHECK-NEXT: [[TMP7 :%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP21 ]], [[FOR_BODY]] ]
61- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP7 ]], i64 1
60+ ; CHECK-NEXT: [[TMP6 :%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP17 ]], [[FOR_BODY]] ]
61+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP6 ]], i32 1
6262; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP15]], 1
63- ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP7 ]], i64 0
64- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0
63+ ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP6 ]], i32 0
64+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
6565; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <2 x i32> zeroinitializer
6666; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]]
67- ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 0
68- ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP6]] to i64
69- ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP17]]
67+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
68+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i32 [[TMP12]]
7069; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
7170; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP16]]
72- ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i64 1
73- ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
74- ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP9]]
71+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
72+ ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP11]]
7573; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
7674; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
77- ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
78- ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP10]], i64 0
79- ; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
80- ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP23]]
75+ ; CHECK-NEXT: [[TMP18:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]]
76+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0
77+ ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP13]]
8178; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
8279; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
83- ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP10]], i64 1
84- ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP24]] to i64
85- ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP18]]
80+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP18]], i32 1
81+ ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP14]]
8682; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
87- ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD11]], i64 0
88- ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP7 ]], i32 [[T12]], i64 0
89- ; CHECK-NEXT: [[TMP21 ]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
90- ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = extractelement <2 x i32> [[TMP21 ]], i64 1
83+ ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ADD11]], i32 0
84+ ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP6 ]], i32 [[T12]], i32 0
85+ ; CHECK-NEXT: [[TMP17 ]] = add nsw <2 x i32> [[TMP19]], [[TMP20]]
86+ ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = extractelement <2 x i32> [[TMP17 ]], i32 1
9187; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
9288; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
9389;
@@ -137,7 +133,7 @@ for.body:
137133; YAML: Function: getelementptr_2x32
138134; YAML: Args:
139135; YAML: - String: 'SLP vectorized with cost '
140- ; YAML: - Cost: '10 '
136+ ; YAML: - Cost: '8 '
141137; YAML-NEXT: - String: ' and with tree size '
142138; YAML-NEXT: - TreeSize: '3'
143139
@@ -147,40 +143,37 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
147143; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
148144; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
149145; CHECK: for.body.preheader:
150- ; CHECK-NEXT: [[TMP10 :%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
151- ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> [[TMP10 ]], i32 [[Z:%.*]], i64 1
146+ ; CHECK-NEXT: [[TMP4 :%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
147+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> [[TMP4 ]], i32 [[Z:%.*]], i32 1
152148; CHECK-NEXT: br label [[FOR_BODY:%.*]]
153149; CHECK: for.cond.cleanup.loopexit:
154- ; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <2 x i32> [[TMP18 :%.*]], i64 0
150+ ; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <2 x i32> [[TMP13 :%.*]], i32 0
155151; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
156152; CHECK: for.cond.cleanup:
157153; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
158154; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
159155; CHECK: for.body:
160- ; CHECK-NEXT: [[TMP11 :%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP18 ]], [[FOR_BODY]] ]
161- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11 ]], i64 1
156+ ; CHECK-NEXT: [[TMP6 :%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP13 ]], [[FOR_BODY]] ]
157+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP6 ]], i32 1
162158; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP12]], 1
163- ; CHECK-NEXT: [[TMP5 :%.*]] = sext i32 [[T4]] to i64
164- ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP5 ]]
159+ ; CHECK-NEXT: [[T5 :%.*]] = add nsw i32 [[T4]], 0
160+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i32 [[T5 ]]
165161; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
166- ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP11]], i64 0
167- ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP6]]
168- ; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[T4]] to i64
169- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[G]], i64 [[TMP7]]
170- ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 4
162+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
163+ ; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP5]]
164+ ; CHECK-NEXT: [[T7:%.*]] = add nsw i32 [[T4]], 1
165+ ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[T7]]
171166; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
172167; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]]
173- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i64 0
168+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0
174169; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
175170; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP0]]
176- ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i64 0
177- ; CHECK-NEXT: [[TMP8:%.*]] = sext i32 [[TMP4]] to i64
178- ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP8]]
179- ; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4
171+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
172+ ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP9]]
173+ ; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
180174; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]]
181- ; CHECK-NEXT: [[T11:%.*]] = extractelement <2 x i32> [[TMP3]], i64 1
182- ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[T11]] to i64
183- ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i64 [[TMP9]]
175+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
176+ ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[G]], i32 [[TMP10]]
184177; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
185178; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]]
186179; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
0 commit comments