Skip to content

Commit aaa2e6c

Browse files
rj-jesussjoerdmeijer
authored andcommitted
[LoopIdiomVectorize] Fix FindFirstByte successors
This refactors fixSuccessorPhis from LoopIdiomVectorize::transformByteCompare and uses it in LoopIdiomVectorize::expandFindFirstByte to ensure that all successor Phis have incoming values from the vector basic blocks. Fixes #156588.
1 parent 69d0c3e commit aaa2e6c

File tree

2 files changed

+188
-51
lines changed

2 files changed

+188
-51
lines changed

llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp

Lines changed: 48 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,10 @@ class LoopIdiomVectorize {
170170
bool recognizeFindFirstByte();
171171

172172
Value *expandFindFirstByte(IRBuilder<> &Builder, DomTreeUpdater &DTU,
173-
unsigned VF, Type *CharTy, BasicBlock *ExitSucc,
174-
BasicBlock *ExitFail, Value *SearchStart,
175-
Value *SearchEnd, Value *NeedleStart,
176-
Value *NeedleEnd);
173+
unsigned VF, Type *CharTy, Value *IndPhi,
174+
BasicBlock *ExitSucc, BasicBlock *ExitFail,
175+
Value *SearchStart, Value *SearchEnd,
176+
Value *NeedleStart, Value *NeedleEnd);
177177

178178
void transformFindFirstByte(PHINode *IndPhi, unsigned VF, Type *CharTy,
179179
BasicBlock *ExitSucc, BasicBlock *ExitFail,
@@ -242,6 +242,37 @@ bool LoopIdiomVectorize::run(Loop *L) {
242242
return false;
243243
}
244244

245+
static void fixSuccessorPhis(Loop *L, Value *ScalarRes, Value *VectorRes,
246+
BasicBlock *SuccBB, BasicBlock *IncBB) {
247+
for (PHINode &PN : SuccBB->phis()) {
248+
// Look through the incoming values to find ScalarRes, meaning this is a
249+
// PHI collecting the results of the transformation.
250+
bool ResPhi = false;
251+
for (Value *Op : PN.incoming_values())
252+
if (Op == ScalarRes) {
253+
ResPhi = true;
254+
break;
255+
}
256+
257+
// Any PHI that depended upon the result of the transformation needs a new
258+
// incoming value from IncBB.
259+
if (ResPhi)
260+
PN.addIncoming(VectorRes, IncBB);
261+
else {
262+
// There should be no other outside uses of other values in the
263+
// original loop. Any incoming values should either:
264+
// 1. Be for blocks outside the loop, which aren't interesting. Or ..
265+
// 2. These are from blocks in the loop with values defined outside
266+
// the loop. We should a similar incoming value from CmpBB.
267+
for (BasicBlock *BB : PN.blocks())
268+
if (L->contains(BB)) {
269+
PN.addIncoming(PN.getIncomingValueForBlock(BB), IncBB);
270+
break;
271+
}
272+
}
273+
}
274+
}
275+
245276
bool LoopIdiomVectorize::recognizeByteCompare() {
246277
// Currently the transformation only works on scalable vector types, although
247278
// there is no fundamental reason why it cannot be made to work for fixed
@@ -940,42 +971,10 @@ void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,
940971
DTU.applyUpdates({{DominatorTree::Insert, CmpBB, FoundBB}});
941972
}
942973

943-
auto fixSuccessorPhis = [&](BasicBlock *SuccBB) {
944-
for (PHINode &PN : SuccBB->phis()) {
945-
// At this point we've already replaced all uses of the result from the
946-
// loop with ByteCmp. Look through the incoming values to find ByteCmp,
947-
// meaning this is a Phi collecting the results of the byte compare.
948-
bool ResPhi = false;
949-
for (Value *Op : PN.incoming_values())
950-
if (Op == ByteCmpRes) {
951-
ResPhi = true;
952-
break;
953-
}
954-
955-
// Any PHI that depended upon the result of the byte compare needs a new
956-
// incoming value from CmpBB. This is because the original loop will get
957-
// deleted.
958-
if (ResPhi)
959-
PN.addIncoming(ByteCmpRes, CmpBB);
960-
else {
961-
// There should be no other outside uses of other values in the
962-
// original loop. Any incoming values should either:
963-
// 1. Be for blocks outside the loop, which aren't interesting. Or ..
964-
// 2. These are from blocks in the loop with values defined outside
965-
// the loop. We should a similar incoming value from CmpBB.
966-
for (BasicBlock *BB : PN.blocks())
967-
if (CurLoop->contains(BB)) {
968-
PN.addIncoming(PN.getIncomingValueForBlock(BB), CmpBB);
969-
break;
970-
}
971-
}
972-
}
973-
};
974-
975974
// Ensure all Phis in the successors of CmpBB have an incoming value from it.
976-
fixSuccessorPhis(EndBB);
975+
fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, EndBB, CmpBB);
977976
if (EndBB != FoundBB)
978-
fixSuccessorPhis(FoundBB);
977+
fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, FoundBB, CmpBB);
979978

980979
// The new CmpBB block isn't part of the loop, but will need to be added to
981980
// the outer loop if there is one.
@@ -1173,8 +1172,9 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
11731172

11741173
Value *LoopIdiomVectorize::expandFindFirstByte(
11751174
IRBuilder<> &Builder, DomTreeUpdater &DTU, unsigned VF, Type *CharTy,
1176-
BasicBlock *ExitSucc, BasicBlock *ExitFail, Value *SearchStart,
1177-
Value *SearchEnd, Value *NeedleStart, Value *NeedleEnd) {
1175+
Value *IndPhi, BasicBlock *ExitSucc, BasicBlock *ExitFail,
1176+
Value *SearchStart, Value *SearchEnd, Value *NeedleStart,
1177+
Value *NeedleEnd) {
11781178
// Set up some types and constants that we intend to reuse.
11791179
auto *PtrTy = Builder.getPtrTy();
11801180
auto *I64Ty = Builder.getInt64Ty();
@@ -1374,6 +1374,12 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
13741374
MatchLCSSA->addIncoming(Search, BB2);
13751375
MatchPredLCSSA->addIncoming(MatchPred, BB2);
13761376

1377+
// Ensure all Phis in the successors of BB3/BB5 have an incoming value from
1378+
// them.
1379+
fixSuccessorPhis(CurLoop, IndPhi, MatchVal, ExitSucc, BB3);
1380+
if (ExitSucc != ExitFail)
1381+
fixSuccessorPhis(CurLoop, IndPhi, MatchVal, ExitFail, BB5);
1382+
13771383
if (VerifyLoops) {
13781384
OuterLoop->verifyLoop();
13791385
InnerLoop->verifyLoop();
@@ -1395,21 +1401,12 @@ void LoopIdiomVectorize::transformFindFirstByte(
13951401
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
13961402
Builder.SetCurrentDebugLocation(PHBranch->getDebugLoc());
13971403

1398-
Value *MatchVal =
1399-
expandFindFirstByte(Builder, DTU, VF, CharTy, ExitSucc, ExitFail,
1400-
SearchStart, SearchEnd, NeedleStart, NeedleEnd);
1404+
expandFindFirstByte(Builder, DTU, VF, CharTy, IndPhi, ExitSucc, ExitFail,
1405+
SearchStart, SearchEnd, NeedleStart, NeedleEnd);
14011406

14021407
assert(PHBranch->isUnconditional() &&
14031408
"Expected preheader to terminate with an unconditional branch.");
14041409

1405-
// Add new incoming values with the result of the transformation to PHINodes
1406-
// of ExitSucc that use IndPhi.
1407-
for (auto *U : llvm::make_early_inc_range(IndPhi->users())) {
1408-
auto *PN = dyn_cast<PHINode>(U);
1409-
if (PN && PN->getParent() == ExitSucc)
1410-
PN->addIncoming(MatchVal, cast<Instruction>(MatchVal)->getParent());
1411-
}
1412-
14131410
if (VerifyLoops && CurLoop->getParentLoop()) {
14141411
CurLoop->getParentLoop()->verifyLoop();
14151412
if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))

llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,146 @@ exit:
490490
ret ptr %res
491491
}
492492

493+
define ptr @ensure_not_found_successors_fixed(ptr %search_start, ptr %search_end, ptr %needle_start, ptr %needle_end) #0 {
494+
; CHECK-LABEL: define ptr @ensure_not_found_successors_fixed(
495+
; CHECK-SAME: ptr [[SEARCH_START:%.*]], ptr [[SEARCH_END:%.*]], ptr [[NEEDLE_START:%.*]], ptr [[NEEDLE_END:%.*]]) #[[ATTR0]] {
496+
; CHECK-NEXT: [[ENTRY:.*:]]
497+
; CHECK-NEXT: br label %[[MEM_CHECK:.*]]
498+
; CHECK: [[MEM_CHECK]]:
499+
; CHECK-NEXT: [[SEARCH_START_INT:%.*]] = ptrtoint ptr [[SEARCH_START]] to i64
500+
; CHECK-NEXT: [[SEARCH_END_INT:%.*]] = ptrtoint ptr [[SEARCH_END]] to i64
501+
; CHECK-NEXT: [[NEEDLE_START_INT:%.*]] = ptrtoint ptr [[NEEDLE_START]] to i64
502+
; CHECK-NEXT: [[NEEDLE_END_INT:%.*]] = ptrtoint ptr [[NEEDLE_END]] to i64
503+
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
504+
; CHECK-NEXT: [[SEARCH_START_PAGE:%.*]] = lshr i64 [[SEARCH_START_INT]], 12
505+
; CHECK-NEXT: [[SEARCH_END_PAGE:%.*]] = lshr i64 [[SEARCH_END_INT]], 12
506+
; CHECK-NEXT: [[NEEDLE_START_PAGE:%.*]] = lshr i64 [[NEEDLE_START_INT]], 12
507+
; CHECK-NEXT: [[NEEDLE_END_PAGE:%.*]] = lshr i64 [[NEEDLE_END_INT]], 12
508+
; CHECK-NEXT: [[SEARCH_PAGE_CMP:%.*]] = icmp ne i64 [[SEARCH_START_PAGE]], [[SEARCH_END_PAGE]]
509+
; CHECK-NEXT: [[NEEDLE_PAGE_CMP:%.*]] = icmp ne i64 [[NEEDLE_START_PAGE]], [[NEEDLE_END_PAGE]]
510+
; CHECK-NEXT: [[COMBINED_PAGE_CMP:%.*]] = or i1 [[SEARCH_PAGE_CMP]], [[NEEDLE_PAGE_CMP]]
511+
; CHECK-NEXT: br i1 [[COMBINED_PAGE_CMP]], label %[[SCALAR_PREHEADER:.*]], label %[[FIND_FIRST_VEC_HEADER:.*]], !prof [[PROF0]]
512+
; CHECK: [[FIND_FIRST_VEC_HEADER]]:
513+
; CHECK-NEXT: [[PSEARCH:%.*]] = phi ptr [ [[SEARCH_START]], %[[MEM_CHECK]] ], [ [[SEARCH_NEXT_VEC:%.*]], %[[SEARCH_CHECK_VEC:.*]] ]
514+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PSEARCH]] to i64
515+
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
516+
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
517+
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PSEARCH]], i32 1, <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
518+
; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
519+
; CHECK: [[MATCH_CHECK_VEC]]:
520+
; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
521+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
522+
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
523+
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
524+
; CHECK-NEXT: [[NEEDLE_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PNEEDLE]], i32 1, <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> zeroinitializer)
525+
; CHECK-NEXT: [[NEEDLE0:%.*]] = extractelement <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], i64 0
526+
; CHECK-NEXT: [[NEEDLE0_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[NEEDLE0]], i64 0
527+
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
528+
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
529+
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
530+
; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
531+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
532+
; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
533+
; CHECK: [[CALCULATE_MATCH]]:
534+
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
535+
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
536+
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
537+
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
538+
; CHECK-NEXT: br label %[[FOUND_MATCH:.*]]
539+
; CHECK: [[NEEDLE_CHECK_VEC]]:
540+
; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
541+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
542+
; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
543+
; CHECK: [[SEARCH_CHECK_VEC]]:
544+
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
545+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
546+
; CHECK-NEXT: br i1 [[TMP5]], label %[[FIND_FIRST_VEC_HEADER]], label %[[NOT_FOUND:.*]]
547+
; CHECK: [[SCALAR_PREHEADER]]:
548+
; CHECK-NEXT: br label %[[HEADER:.*]]
549+
; CHECK: [[HEADER]]:
550+
; CHECK-NEXT: [[SEARCH_PTR:%.*]] = phi ptr [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[SCALAR_PREHEADER]] ]
551+
; CHECK-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr [[SEARCH_PTR]], align 1
552+
; CHECK-NEXT: br label %[[MATCH_CHECK:.*]]
553+
; CHECK: [[NEEDLE_CHECK:.*]]:
554+
; CHECK-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr i8, ptr [[NEEDLE_PTR:%.*]], i64 1
555+
; CHECK-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr [[NEEDLE_NEXT]], [[NEEDLE_END]]
556+
; CHECK-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
557+
; CHECK: [[MATCH_CHECK]]:
558+
; CHECK-NEXT: [[NEEDLE_PTR]] = phi ptr [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
559+
; CHECK-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr [[NEEDLE_PTR]], align 1
560+
; CHECK-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
561+
; CHECK-NEXT: br i1 [[MATCH_CMP]], label %[[FOUND_MATCH]], label %[[NEEDLE_CHECK]]
562+
; CHECK: [[SEARCH_CHECK]]:
563+
; CHECK-NEXT: [[SEARCH_NEXT]] = getelementptr i8, ptr [[SEARCH_PTR]], i64 1
564+
; CHECK-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr [[SEARCH_NEXT]], [[SEARCH_END]]
565+
; CHECK-NEXT: br i1 [[SEARCH_CMP]], label %[[NOT_FOUND]], label %[[HEADER]]
566+
; CHECK: [[FOUND_MATCH]]:
567+
; CHECK-NEXT: [[SEARCH_PTR_LCSSA:%.*]] = phi ptr [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ], [ [[MATCH_RES]], %[[CALCULATE_MATCH]] ]
568+
; CHECK-NEXT: ret ptr [[SEARCH_PTR_LCSSA]]
569+
; CHECK: [[NOT_FOUND]]:
570+
; CHECK-NEXT: [[UNUSED:%.*]] = phi i64 [ 0, %[[SEARCH_CHECK]] ], [ 0, %[[SEARCH_CHECK_VEC]] ]
571+
; CHECK-NEXT: ret ptr null
572+
;
573+
; DISABLE-LABEL: define ptr @ensure_not_found_successors_fixed(
574+
; DISABLE-SAME: ptr [[SEARCH_START:%.*]], ptr [[SEARCH_END:%.*]], ptr [[NEEDLE_START:%.*]], ptr [[NEEDLE_END:%.*]]) #[[ATTR0]] {
575+
; DISABLE-NEXT: [[ENTRY:.*]]:
576+
; DISABLE-NEXT: br label %[[HEADER:.*]]
577+
; DISABLE: [[HEADER]]:
578+
; DISABLE-NEXT: [[SEARCH_PTR:%.*]] = phi ptr [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[ENTRY]] ]
579+
; DISABLE-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr [[SEARCH_PTR]], align 1
580+
; DISABLE-NEXT: br label %[[MATCH_CHECK:.*]]
581+
; DISABLE: [[NEEDLE_CHECK:.*]]:
582+
; DISABLE-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr i8, ptr [[NEEDLE_PTR:%.*]], i64 1
583+
; DISABLE-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr [[NEEDLE_NEXT]], [[NEEDLE_END]]
584+
; DISABLE-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
585+
; DISABLE: [[MATCH_CHECK]]:
586+
; DISABLE-NEXT: [[NEEDLE_PTR]] = phi ptr [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
587+
; DISABLE-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr [[NEEDLE_PTR]], align 1
588+
; DISABLE-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
589+
; DISABLE-NEXT: br i1 [[MATCH_CMP]], label %[[FOUND_MATCH:.*]], label %[[NEEDLE_CHECK]]
590+
; DISABLE: [[SEARCH_CHECK]]:
591+
; DISABLE-NEXT: [[SEARCH_NEXT]] = getelementptr i8, ptr [[SEARCH_PTR]], i64 1
592+
; DISABLE-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr [[SEARCH_NEXT]], [[SEARCH_END]]
593+
; DISABLE-NEXT: br i1 [[SEARCH_CMP]], label %[[NOT_FOUND:.*]], label %[[HEADER]]
594+
; DISABLE: [[FOUND_MATCH]]:
595+
; DISABLE-NEXT: [[SEARCH_PTR_LCSSA:%.*]] = phi ptr [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ]
596+
; DISABLE-NEXT: ret ptr [[SEARCH_PTR_LCSSA]]
597+
; DISABLE: [[NOT_FOUND]]:
598+
; DISABLE-NEXT: [[UNUSED:%.*]] = phi i64 [ 0, %[[SEARCH_CHECK]] ]
599+
; DISABLE-NEXT: ret ptr null
600+
;
601+
entry:
602+
br label %header
603+
604+
header:
605+
%search_ptr = phi ptr [ %search_next, %search_check ], [ %search_start, %entry ]
606+
%search_load = load i8, ptr %search_ptr, align 1
607+
br label %match_check
608+
609+
needle_check:
610+
%needle_next = getelementptr i8, ptr %needle_ptr, i64 1
611+
%needle_cmp = icmp eq ptr %needle_next, %needle_end
612+
br i1 %needle_cmp, label %search_check, label %match_check
613+
614+
match_check:
615+
%needle_ptr = phi ptr [ %needle_start, %header ], [ %needle_next, %needle_check ]
616+
%needle_load = load i8, ptr %needle_ptr, align 1
617+
%match_cmp = icmp eq i8 %search_load, %needle_load
618+
br i1 %match_cmp, label %found_match, label %needle_check
619+
620+
search_check:
621+
%search_next = getelementptr i8, ptr %search_ptr, i64 1
622+
%search_cmp = icmp eq ptr %search_next, %search_end
623+
br i1 %search_cmp, label %not_found, label %header
624+
625+
found_match:
626+
ret ptr %search_ptr
627+
628+
not_found:
629+
%unused = phi i64 [ 0, %search_check ]
630+
ret ptr null
631+
}
632+
493633
; From here on we only test for the presence/absence of the intrinsic.
494634
; UTC_ARGS: --disable
495635

0 commit comments

Comments
 (0)