@@ -58,17 +58,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
5858; VF8UF2: [[VECTOR_PH]]:
5959; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
6060; VF8UF2: [[VECTOR_BODY]]:
61- ; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
62- ; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
63- ; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
61+ ; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
6462; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
6563; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
6664; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
67- ; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
6865; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
69- ; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
70- ; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
71- ; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
66+ ; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
7267; VF8UF2: [[MIDDLE_SPLIT]]:
7368; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
7469; VF8UF2: [[MIDDLE_BLOCK]]:
@@ -87,7 +82,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
8782; VF8UF2: [[LOOP_LATCH]]:
8883; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
8984; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
90- ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3 :![0-9]+]]
85+ ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0 :![0-9]+]]
9186; VF8UF2: [[EXIT]]:
9287; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
9388; VF8UF2-NEXT: ret i8 [[RES]]
@@ -100,17 +95,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
10095; VF16UF1: [[VECTOR_PH]]:
10196; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
10297; VF16UF1: [[VECTOR_BODY]]:
103- ; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
104- ; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
105- ; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
98+ ; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
10699; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
107100; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
108101; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
109- ; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
110102; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
111- ; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
112- ; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
113- ; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
103+ ; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
114104; VF16UF1: [[MIDDLE_SPLIT]]:
115105; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
116106; VF16UF1: [[MIDDLE_BLOCK]]:
@@ -129,7 +119,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
129119; VF16UF1: [[LOOP_LATCH]]:
130120; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
131121; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
132- ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3 :![0-9]+]]
122+ ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0 :![0-9]+]]
133123; VF16UF1: [[EXIT]]:
134124; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
135125; VF16UF1-NEXT: ret i8 [[RES]]
@@ -219,11 +209,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
219209; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
220210; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
221211; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
222- ; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
223- ; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
224212; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
225- ; VF8UF2-NEXT: [[TMP6 :%.*]] = or i1 [[TMP4]], [[TMP5]]
226- ; VF8UF2-NEXT: br i1 [[TMP6]] , label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
213+ ; VF8UF2-NEXT: [[TMP4 :%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
214+ ; VF8UF2-NEXT: br i1 true , label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
227215; VF8UF2: [[MIDDLE_SPLIT]]:
228216; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
229217; VF8UF2: [[MIDDLE_BLOCK]]:
@@ -244,7 +232,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
244232; VF8UF2: [[LOOP_LATCH]]:
245233; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
246234; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
247- ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5 :![0-9]+]]
235+ ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4 :![0-9]+]]
248236; VF8UF2: [[EXIT]]:
249237; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
250238; VF8UF2-NEXT: ret i64 [[RES]]
@@ -265,11 +253,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
265253; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
266254; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
267255; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
268- ; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
269- ; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
270256; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
271- ; VF16UF1-NEXT: [[TMP6 :%.*]] = or i1 [[TMP4]], [[TMP5]]
272- ; VF16UF1-NEXT: br i1 [[TMP6]] , label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
257+ ; VF16UF1-NEXT: [[TMP4 :%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
258+ ; VF16UF1-NEXT: br i1 true , label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3 :![0-9]+]]
273259; VF16UF1: [[MIDDLE_SPLIT]]:
274260; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
275261; VF16UF1: [[MIDDLE_BLOCK]]:
@@ -290,7 +276,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64
290276; VF16UF1: [[LOOP_LATCH]]:
291277; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
292278; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
293- ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5 :![0-9]+]]
279+ ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4 :![0-9]+]]
294280; VF16UF1: [[EXIT]]:
295281; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
296282; VF16UF1-NEXT: ret i64 [[RES]]
0 commit comments