Skip to content

Commit 6d17551

Browse files
committed
Add tests for different success/failure exits
1 parent db2fbc6 commit 6d17551

File tree

2 files changed

+220
-1
lines changed

2 files changed

+220
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
//
6161
// NOTE: This Pass matches really specific loop patterns because it's only
6262
// supposed to be a temporary solution until our LoopVectorizer is powerful
63-
// enought to vectorize them automatically.
63+
// enough to vectorize them automatically.
6464
//
6565
//===----------------------------------------------------------------------===//
6666

llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,176 @@ exit:
320320
ret ptr %res
321321
}
322322

323+
; Same as @find_first_of_i8 but with two intermediate exit blocks for the
324+
; "success" (exit_succ) and "failure" (exit_fail) paths.
325+
define ptr @find_first_of_i8_multi_exit(ptr %search_start, ptr %search_end, ptr %needle_start, ptr %needle_end) #0 {
326+
; CHECK-LABEL: define ptr @find_first_of_i8_multi_exit(
327+
; CHECK-SAME: ptr [[SEARCH_START:%.*]], ptr [[SEARCH_END:%.*]], ptr [[NEEDLE_START:%.*]], ptr [[NEEDLE_END:%.*]]) #[[ATTR0]] {
328+
; CHECK-NEXT: [[ENTRY:.*]]:
329+
; CHECK-NEXT: [[SEARCH_TEST:%.*]] = icmp eq ptr [[SEARCH_START]], [[SEARCH_END]]
330+
; CHECK-NEXT: [[NEEDLE_TEST:%.*]] = icmp eq ptr [[NEEDLE_START]], [[NEEDLE_END]]
331+
; CHECK-NEXT: [[COMBINED_TEST:%.*]] = or i1 [[SEARCH_TEST]], [[NEEDLE_TEST]]
332+
; CHECK-NEXT: br i1 [[COMBINED_TEST]], label %[[EXIT_FAIL:.*]], label %[[HEADER_PREHEADER:.*]]
333+
; CHECK: [[HEADER_PREHEADER]]:
334+
; CHECK-NEXT: br label %[[MEM_CHECK:.*]]
335+
; CHECK: [[MEM_CHECK]]:
336+
; CHECK-NEXT: [[SEARCH_START_INT:%.*]] = ptrtoint ptr [[SEARCH_START]] to i64
337+
; CHECK-NEXT: [[SEARCH_END_INT:%.*]] = ptrtoint ptr [[SEARCH_END]] to i64
338+
; CHECK-NEXT: [[NEEDLE_START_INT:%.*]] = ptrtoint ptr [[NEEDLE_START]] to i64
339+
; CHECK-NEXT: [[NEEDLE_END_INT:%.*]] = ptrtoint ptr [[NEEDLE_END]] to i64
340+
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
341+
; CHECK-NEXT: [[SEARCH_START_PAGE:%.*]] = lshr i64 [[SEARCH_START_INT]], 12
342+
; CHECK-NEXT: [[SEARCH_END_PAGE:%.*]] = lshr i64 [[SEARCH_END_INT]], 12
343+
; CHECK-NEXT: [[NEEDLE_START_PAGE:%.*]] = lshr i64 [[NEEDLE_START_INT]], 12
344+
; CHECK-NEXT: [[NEEDLE_END_PAGE:%.*]] = lshr i64 [[NEEDLE_END_INT]], 12
345+
; CHECK-NEXT: [[SEARCH_PAGE_CMP:%.*]] = icmp ne i64 [[SEARCH_START_PAGE]], [[SEARCH_END_PAGE]]
346+
; CHECK-NEXT: [[NEEDLE_PAGE_CMP:%.*]] = icmp ne i64 [[NEEDLE_START_PAGE]], [[NEEDLE_END_PAGE]]
347+
; CHECK-NEXT: [[COMBINED_PAGE_CMP:%.*]] = or i1 [[SEARCH_PAGE_CMP]], [[NEEDLE_PAGE_CMP]]
348+
; CHECK-NEXT: br i1 [[COMBINED_PAGE_CMP]], label %[[SCALAR_PREHEADER:.*]], label %[[FIND_FIRST_VEC_HEADER:.*]], !prof [[PROF0]]
349+
; CHECK: [[FIND_FIRST_VEC_HEADER]]:
350+
; CHECK-NEXT: [[PSEARCH:%.*]] = phi ptr [ [[SEARCH_START]], %[[MEM_CHECK]] ], [ [[SEARCH_NEXT_VEC:%.*]], %[[SEARCH_CHECK_VEC:.*]] ]
351+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PSEARCH]] to i64
352+
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
353+
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
354+
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PSEARCH]], i32 1, <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
355+
; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
356+
; CHECK: [[MATCH_CHECK_VEC]]:
357+
; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
358+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
359+
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
360+
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
361+
; CHECK-NEXT: [[NEEDLE_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PNEEDLE]], i32 1, <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> zeroinitializer)
362+
; CHECK-NEXT: [[NEEDLE0:%.*]] = extractelement <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], i64 0
363+
; CHECK-NEXT: [[NEEDLE0_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[NEEDLE0]], i64 0
364+
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
365+
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
366+
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
367+
; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
368+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
369+
; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
370+
; CHECK: [[CALCULATE_MATCH]]:
371+
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
372+
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
373+
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
374+
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
375+
; CHECK-NEXT: br label %[[EXIT_SUCC:.*]]
376+
; CHECK: [[NEEDLE_CHECK_VEC]]:
377+
; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
378+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
379+
; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
380+
; CHECK: [[SEARCH_CHECK_VEC]]:
381+
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
382+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
383+
; CHECK-NEXT: br i1 [[TMP5]], label %[[FIND_FIRST_VEC_HEADER]], label %[[EXIT_FAIL_LOOPEXIT:.*]]
384+
; CHECK: [[SCALAR_PREHEADER]]:
385+
; CHECK-NEXT: br label %[[HEADER:.*]]
386+
; CHECK: [[HEADER]]:
387+
; CHECK-NEXT: [[SEARCH_PTR:%.*]] = phi ptr [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[SCALAR_PREHEADER]] ]
388+
; CHECK-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr [[SEARCH_PTR]], align 1
389+
; CHECK-NEXT: br label %[[MATCH_CHECK:.*]]
390+
; CHECK: [[NEEDLE_CHECK:.*]]:
391+
; CHECK-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr inbounds i8, ptr [[NEEDLE_PTR:%.*]], i64 1
392+
; CHECK-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr [[NEEDLE_NEXT]], [[NEEDLE_END]]
393+
; CHECK-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
394+
; CHECK: [[MATCH_CHECK]]:
395+
; CHECK-NEXT: [[NEEDLE_PTR]] = phi ptr [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
396+
; CHECK-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr [[NEEDLE_PTR]], align 1
397+
; CHECK-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
398+
; CHECK-NEXT: br i1 [[MATCH_CMP]], label %[[EXIT_SUCC]], label %[[NEEDLE_CHECK]]
399+
; CHECK: [[SEARCH_CHECK]]:
400+
; CHECK-NEXT: [[SEARCH_NEXT]] = getelementptr inbounds i8, ptr [[SEARCH_PTR]], i64 1
401+
; CHECK-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr [[SEARCH_NEXT]], [[SEARCH_END]]
402+
; CHECK-NEXT: br i1 [[SEARCH_CMP]], label %[[EXIT_FAIL_LOOPEXIT]], label %[[HEADER]]
403+
; CHECK: [[EXIT_SUCC]]:
404+
; CHECK-NEXT: [[RES_SUCC:%.*]] = phi ptr [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ], [ [[MATCH_RES]], %[[CALCULATE_MATCH]] ]
405+
; CHECK-NEXT: br label %[[EXIT:.*]]
406+
; CHECK: [[EXIT_FAIL_LOOPEXIT]]:
407+
; CHECK-NEXT: br label %[[EXIT_FAIL]]
408+
; CHECK: [[EXIT_FAIL]]:
409+
; CHECK-NEXT: [[RES_FAIL:%.*]] = phi ptr [ [[SEARCH_END]], %[[ENTRY]] ], [ [[SEARCH_END]], %[[EXIT_FAIL_LOOPEXIT]] ]
410+
; CHECK-NEXT: br label %[[EXIT]]
411+
; CHECK: [[EXIT]]:
412+
; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[RES_SUCC]], %[[EXIT_SUCC]] ], [ [[RES_FAIL]], %[[EXIT_FAIL]] ]
413+
; CHECK-NEXT: ret ptr [[RES]]
414+
;
415+
; DISABLE-LABEL: define ptr @find_first_of_i8_multi_exit(
416+
; DISABLE-SAME: ptr [[SEARCH_START:%.*]], ptr [[SEARCH_END:%.*]], ptr [[NEEDLE_START:%.*]], ptr [[NEEDLE_END:%.*]]) #[[ATTR0]] {
417+
; DISABLE-NEXT: [[ENTRY:.*]]:
418+
; DISABLE-NEXT: [[SEARCH_TEST:%.*]] = icmp eq ptr [[SEARCH_START]], [[SEARCH_END]]
419+
; DISABLE-NEXT: [[NEEDLE_TEST:%.*]] = icmp eq ptr [[NEEDLE_START]], [[NEEDLE_END]]
420+
; DISABLE-NEXT: [[COMBINED_TEST:%.*]] = or i1 [[SEARCH_TEST]], [[NEEDLE_TEST]]
421+
; DISABLE-NEXT: br i1 [[COMBINED_TEST]], label %[[EXIT_FAIL:.*]], label %[[HEADER_PREHEADER:.*]]
422+
; DISABLE: [[HEADER_PREHEADER]]:
423+
; DISABLE-NEXT: br label %[[HEADER:.*]]
424+
; DISABLE: [[HEADER]]:
425+
; DISABLE-NEXT: [[SEARCH_PTR:%.*]] = phi ptr [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[HEADER_PREHEADER]] ]
426+
; DISABLE-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr [[SEARCH_PTR]], align 1
427+
; DISABLE-NEXT: br label %[[MATCH_CHECK:.*]]
428+
; DISABLE: [[NEEDLE_CHECK:.*]]:
429+
; DISABLE-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr inbounds i8, ptr [[NEEDLE_PTR:%.*]], i64 1
430+
; DISABLE-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr [[NEEDLE_NEXT]], [[NEEDLE_END]]
431+
; DISABLE-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
432+
; DISABLE: [[MATCH_CHECK]]:
433+
; DISABLE-NEXT: [[NEEDLE_PTR]] = phi ptr [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
434+
; DISABLE-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr [[NEEDLE_PTR]], align 1
435+
; DISABLE-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
436+
; DISABLE-NEXT: br i1 [[MATCH_CMP]], label %[[EXIT_SUCC:.*]], label %[[NEEDLE_CHECK]]
437+
; DISABLE: [[SEARCH_CHECK]]:
438+
; DISABLE-NEXT: [[SEARCH_NEXT]] = getelementptr inbounds i8, ptr [[SEARCH_PTR]], i64 1
439+
; DISABLE-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr [[SEARCH_NEXT]], [[SEARCH_END]]
440+
; DISABLE-NEXT: br i1 [[SEARCH_CMP]], label %[[EXIT_FAIL_LOOPEXIT:.*]], label %[[HEADER]]
441+
; DISABLE: [[EXIT_SUCC]]:
442+
; DISABLE-NEXT: [[RES_SUCC:%.*]] = phi ptr [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ]
443+
; DISABLE-NEXT: br label %[[EXIT:.*]]
444+
; DISABLE: [[EXIT_FAIL_LOOPEXIT]]:
445+
; DISABLE-NEXT: br label %[[EXIT_FAIL]]
446+
; DISABLE: [[EXIT_FAIL]]:
447+
; DISABLE-NEXT: [[RES_FAIL:%.*]] = phi ptr [ [[SEARCH_END]], %[[ENTRY]] ], [ [[SEARCH_END]], %[[EXIT_FAIL_LOOPEXIT]] ]
448+
; DISABLE-NEXT: br label %[[EXIT]]
449+
; DISABLE: [[EXIT]]:
450+
; DISABLE-NEXT: [[RES:%.*]] = phi ptr [ [[RES_SUCC]], %[[EXIT_SUCC]] ], [ [[RES_FAIL]], %[[EXIT_FAIL]] ]
451+
; DISABLE-NEXT: ret ptr [[RES]]
452+
;
453+
entry:
454+
%search_test = icmp eq ptr %search_start, %search_end
455+
%needle_test = icmp eq ptr %needle_start, %needle_end
456+
%combined_test = or i1 %search_test, %needle_test
457+
br i1 %combined_test, label %exit_fail, label %header
458+
459+
header:
460+
%search_ptr = phi ptr [ %search_next, %search_check ], [ %search_start, %entry ]
461+
%search_load = load i8, ptr %search_ptr, align 1
462+
br label %match_check
463+
464+
needle_check:
465+
%needle_next = getelementptr inbounds i8, ptr %needle_ptr, i64 1
466+
%needle_cmp = icmp eq ptr %needle_next, %needle_end
467+
br i1 %needle_cmp, label %search_check, label %match_check
468+
469+
match_check:
470+
%needle_ptr = phi ptr [ %needle_start, %header ], [ %needle_next, %needle_check ]
471+
%needle_load = load i8, ptr %needle_ptr, align 1
472+
%match_cmp = icmp eq i8 %search_load, %needle_load
473+
br i1 %match_cmp, label %exit_succ, label %needle_check
474+
475+
search_check:
476+
%search_next = getelementptr inbounds i8, ptr %search_ptr, i64 1
477+
%search_cmp = icmp eq ptr %search_next, %search_end
478+
br i1 %search_cmp, label %exit_fail, label %header
479+
480+
exit_succ:
481+
%res_succ = phi ptr [ %search_ptr, %match_check ]
482+
br label %exit
483+
484+
exit_fail:
485+
%res_fail = phi ptr [ %search_end, %entry ], [ %search_end, %search_check ]
486+
br label %exit
487+
488+
exit:
489+
%res = phi ptr [ %res_succ, %exit_succ ], [ %res_fail, %exit_fail ]
490+
ret ptr %res
491+
}
492+
323493
; From here on we only test for the presence/absence of the intrinsic.
324494
; UTC_ARGS: --disable
325495

@@ -447,6 +617,55 @@ exit:
447617
ret ptr %res
448618
}
449619

620+
; Same as @find_first_of_i8_multi_exit but `search_ptr' is used in `exit_fail'
621+
; which should block the transform.
622+
define ptr @find_first_of_i8_multi_exit_outside_use(ptr %search_start, ptr %search_end, ptr %needle_start, ptr %needle_end) #0 {
623+
; CHECK-LABEL: define ptr @find_first_of_i8_multi_exit_outside_use(
624+
; CHECK-NOT: {{%.*}} @llvm.experimental.vector.match{{.*}}
625+
;
626+
; DISABLE-LABEL: define ptr @find_first_of_i8_multi_exit_outside_use(
627+
; DISABLE-NOT: {{%.*}} @llvm.experimental.vector.match{{.*}}
628+
;
629+
entry:
630+
%search_test = icmp eq ptr %search_start, %search_end
631+
%needle_test = icmp eq ptr %needle_start, %needle_end
632+
%combined_test = or i1 %search_test, %needle_test
633+
br i1 %combined_test, label %exit_fail, label %header
634+
635+
header:
636+
%search_ptr = phi ptr [ %search_next, %search_check ], [ %search_start, %entry ]
637+
%search_load = load i8, ptr %search_ptr, align 1
638+
br label %match_check
639+
640+
needle_check:
641+
%needle_next = getelementptr inbounds i8, ptr %needle_ptr, i64 1
642+
%needle_cmp = icmp eq ptr %needle_next, %needle_end
643+
br i1 %needle_cmp, label %search_check, label %match_check
644+
645+
match_check:
646+
%needle_ptr = phi ptr [ %needle_start, %header ], [ %needle_next, %needle_check ]
647+
%needle_load = load i8, ptr %needle_ptr, align 1
648+
%match_cmp = icmp eq i8 %search_load, %needle_load
649+
br i1 %match_cmp, label %exit_succ, label %needle_check
650+
651+
search_check:
652+
%search_next = getelementptr inbounds i8, ptr %search_ptr, i64 1
653+
%search_cmp = icmp eq ptr %search_next, %search_end
654+
br i1 %search_cmp, label %exit_fail, label %header
655+
656+
exit_succ:
657+
%res_succ = phi ptr [ %search_ptr, %match_check ]
658+
br label %exit
659+
660+
exit_fail:
661+
%res_fail = phi ptr [ %search_end, %entry ], [ %search_ptr, %search_check ]
662+
br label %exit
663+
664+
exit:
665+
%res = phi ptr [ %res_succ, %exit_succ ], [ %res_fail, %exit_fail ]
666+
ret ptr %res
667+
}
668+
450669
attributes #0 = { "target-features"="+sve2" }
451670

452671
; CHECK: [[PROF0]] = !{!"branch_weights", i32 10, i32 90}

0 commit comments

Comments
 (0)