@@ -24,23 +24,23 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
2424; SSE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
2525; SSE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
2626; SSE-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
27- ; SSE-NEXT: [[TMP7 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
28- ; SSE-NEXT: [[TMP8 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
29- ; SSE-NEXT: [[TMP9 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
30- ; SSE-NEXT: store <4 x i32> [[TMP7 ]], ptr [[TMP9 ]], align 4
31- ; SSE-NEXT: [[TMP11 :%.*]] = getelementptr inbounds i32, ptr [[TMP9 ]], i64 4
32- ; SSE-NEXT: store <4 x i32> [[TMP8 ]], ptr [[TMP11 ]], align 4
27+ ; SSE-NEXT: [[TMP5 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
28+ ; SSE-NEXT: [[TMP6 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
29+ ; SSE-NEXT: [[TMP7 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
30+ ; SSE-NEXT: store <4 x i32> [[TMP5 ]], ptr [[TMP7 ]], align 4
31+ ; SSE-NEXT: [[TMP8 :%.*]] = getelementptr inbounds i32, ptr [[TMP7 ]], i64 4
32+ ; SSE-NEXT: store <4 x i32> [[TMP6 ]], ptr [[TMP8 ]], align 4
3333; SSE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
34- ; SSE-NEXT: [[TMP13 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
35- ; SSE-NEXT: br i1 [[TMP13 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
34+ ; SSE-NEXT: [[TMP9 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
35+ ; SSE-NEXT: br i1 [[TMP9 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3636; SSE: middle.block:
3737; SSE-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
3838; SSE: scalar.ph:
3939; SSE-NEXT: br label [[FOR_BODY:%.*]]
4040; SSE: for.cond.cleanup:
4141; SSE-NEXT: ret void
4242; SSE: for.body:
43- ; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
43+ ; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
4444;
4545; AVX1-LABEL: @foo(
4646; AVX1-NEXT: entry:
@@ -72,29 +72,29 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
7272; AVX1-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
7373; AVX1-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
7474; AVX1-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x i32> [[WIDE_VEC3]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
75- ; AVX1-NEXT: [[TMP15 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
76- ; AVX1-NEXT: [[TMP16 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
77- ; AVX1-NEXT: [[TMP17 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
78- ; AVX1-NEXT: [[TMP18 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
79- ; AVX1-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
80- ; AVX1-NEXT: store <4 x i32> [[TMP15 ]], ptr [[TMP19 ]], align 4
81- ; AVX1-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 4
82- ; AVX1-NEXT: store <4 x i32> [[TMP16 ]], ptr [[TMP21 ]], align 4
83- ; AVX1-NEXT: [[TMP23 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 8
84- ; AVX1-NEXT: store <4 x i32> [[TMP17 ]], ptr [[TMP23 ]], align 4
85- ; AVX1-NEXT: [[TMP25 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 12
86- ; AVX1-NEXT: store <4 x i32> [[TMP18 ]], ptr [[TMP25 ]], align 4
75+ ; AVX1-NEXT: [[TMP11 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
76+ ; AVX1-NEXT: [[TMP12 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
77+ ; AVX1-NEXT: [[TMP13 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
78+ ; AVX1-NEXT: [[TMP14 :%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
79+ ; AVX1-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
80+ ; AVX1-NEXT: store <4 x i32> [[TMP11 ]], ptr [[TMP15 ]], align 4
81+ ; AVX1-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 4
82+ ; AVX1-NEXT: store <4 x i32> [[TMP12 ]], ptr [[TMP16 ]], align 4
83+ ; AVX1-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 8
84+ ; AVX1-NEXT: store <4 x i32> [[TMP13 ]], ptr [[TMP17 ]], align 4
85+ ; AVX1-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 12
86+ ; AVX1-NEXT: store <4 x i32> [[TMP14 ]], ptr [[TMP18 ]], align 4
8787; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
88- ; AVX1-NEXT: [[TMP27 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
89- ; AVX1-NEXT: br i1 [[TMP27 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
88+ ; AVX1-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
89+ ; AVX1-NEXT: br i1 [[TMP19 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9090; AVX1: middle.block:
9191; AVX1-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
9292; AVX1: scalar.ph:
9393; AVX1-NEXT: br label [[FOR_BODY:%.*]]
9494; AVX1: for.cond.cleanup:
9595; AVX1-NEXT: ret void
9696; AVX1: for.body:
97- ; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
97+ ; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
9898;
9999; AVX2-LABEL: @foo(
100100; AVX2-NEXT: entry:
@@ -126,29 +126,29 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
126126; AVX2-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
127127; AVX2-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
128128; AVX2-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <16 x i32> [[WIDE_VEC3]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
129- ; AVX2-NEXT: [[TMP15 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
130- ; AVX2-NEXT: [[TMP16 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
131- ; AVX2-NEXT: [[TMP17 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
132- ; AVX2-NEXT: [[TMP18 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
133- ; AVX2-NEXT: [[TMP19 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
134- ; AVX2-NEXT: store <8 x i32> [[TMP15 ]], ptr [[TMP19 ]], align 4
135- ; AVX2-NEXT: [[TMP21 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 8
136- ; AVX2-NEXT: store <8 x i32> [[TMP16 ]], ptr [[TMP21 ]], align 4
137- ; AVX2-NEXT: [[TMP23 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 16
138- ; AVX2-NEXT: store <8 x i32> [[TMP17 ]], ptr [[TMP23 ]], align 4
139- ; AVX2-NEXT: [[TMP25 :%.*]] = getelementptr inbounds i32, ptr [[TMP19 ]], i64 24
140- ; AVX2-NEXT: store <8 x i32> [[TMP18 ]], ptr [[TMP25 ]], align 4
129+ ; AVX2-NEXT: [[TMP11 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
130+ ; AVX2-NEXT: [[TMP12 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
131+ ; AVX2-NEXT: [[TMP13 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
132+ ; AVX2-NEXT: [[TMP14 :%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
133+ ; AVX2-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
134+ ; AVX2-NEXT: store <8 x i32> [[TMP11 ]], ptr [[TMP15 ]], align 4
135+ ; AVX2-NEXT: [[TMP16 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 8
136+ ; AVX2-NEXT: store <8 x i32> [[TMP12 ]], ptr [[TMP16 ]], align 4
137+ ; AVX2-NEXT: [[TMP17 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 16
138+ ; AVX2-NEXT: store <8 x i32> [[TMP13 ]], ptr [[TMP17 ]], align 4
139+ ; AVX2-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i32, ptr [[TMP15 ]], i64 24
140+ ; AVX2-NEXT: store <8 x i32> [[TMP14 ]], ptr [[TMP18 ]], align 4
141141; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
142- ; AVX2-NEXT: [[TMP27 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
143- ; AVX2-NEXT: br i1 [[TMP27 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
142+ ; AVX2-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
143+ ; AVX2-NEXT: br i1 [[TMP19 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
144144; AVX2: middle.block:
145145; AVX2-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
146146; AVX2: scalar.ph:
147147; AVX2-NEXT: br label [[FOR_BODY:%.*]]
148148; AVX2: for.cond.cleanup:
149149; AVX2-NEXT: ret void
150150; AVX2: for.body:
151- ; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2 :![0-9]+]]
151+ ; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
152152;
153153; ATOM-LABEL: @foo(
154154; ATOM-NEXT: entry:
0 commit comments