Skip to content

Commit 041ae03

Browse files
toppercgithub-actions[bot]
authored andcommitted
Automerge: [LoopPeel] Ignore assume intrinsics for the mayWriteToMemory check in peelToTurnInvariantLoadsDereferenceable. (#171547)
llvm.assume intrinsics have the mayWriteToMemory property, but won't prevent the load from becoming dereferenceable.
2 parents dc5853f + ccc3835 commit 041ae03

File tree

2 files changed

+91
-1
lines changed

2 files changed

+91
-1
lines changed

llvm/lib/Transforms/Utils/LoopPeel.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,10 @@ static unsigned peelToTurnInvariantLoadsDereferenceable(Loop &L,
447447
const DataLayout &DL = L.getHeader()->getDataLayout();
448448
for (BasicBlock *BB : L.blocks()) {
449449
for (Instruction &I : *BB) {
450-
if (I.mayWriteToMemory())
450+
// Don't consider llvm.assume as writing to memory.
451+
if (I.mayWriteToMemory() &&
452+
!(isa<IntrinsicInst>(I) &&
453+
cast<IntrinsicInst>(I).getIntrinsicID() == Intrinsic::assume))
451454
return 0;
452455

453456
if (LoadUsers.contains(&I))

llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,3 +622,90 @@ unreachable.exit:
622622
call void @foo()
623623
unreachable
624624
}
625+
626+
define i32 @peel_readonly_to_make_loads_derefenceable_with_assume(ptr %ptr, i32 %N, ptr %inv, i1 %c.1) {
627+
; CHECK-LABEL: @peel_readonly_to_make_loads_derefenceable_with_assume(
628+
; CHECK-NEXT: entry:
629+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
630+
; CHECK: loop.header.peel.begin:
631+
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]]
632+
; CHECK: loop.header.peel:
633+
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN_PEEL:%.*]], label [[UNREACHABLE_EXIT1:%.*]]
634+
; CHECK: then.peel:
635+
; CHECK-NEXT: [[I_PEEL:%.*]] = load i32, ptr [[INV:%.*]], align 4
636+
; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp ugt i32 [[I_PEEL]], 0
637+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND_PEEL]])
638+
; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp ult i32 [[I_PEEL]], 2
639+
; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[LOOP_LATCH_PEEL:%.*]], label [[UNREACHABLE_EXIT1]]
640+
; CHECK: loop.latch.peel:
641+
; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 1
642+
; CHECK-NEXT: [[LV_PEEL:%.*]] = load i32, ptr [[GEP_PEEL]], align 4
643+
; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i32 0, [[LV_PEEL]]
644+
; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
645+
; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
646+
; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
647+
; CHECK: loop.header.peel.next:
648+
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
649+
; CHECK: loop.header.peel.next1:
650+
; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
651+
; CHECK: entry.peel.newph:
652+
; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]]
653+
; CHECK: loop.header:
654+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
655+
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ]
656+
; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[UNREACHABLE_EXIT:%.*]]
657+
; CHECK: then:
658+
; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[INV]], align 4
659+
; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[I]], 0
660+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND]])
661+
; CHECK-NEXT: [[C_2:%.*]] = icmp ult i32 [[I]], 2
662+
; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[UNREACHABLE_EXIT]]
663+
; CHECK: loop.latch:
664+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[IV]]
665+
; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[GEP]], align 4
666+
; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
667+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
668+
; CHECK-NEXT: [[C_3:%.*]] = icmp samesign ult i32 [[IV]], 1000
669+
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER1]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
670+
; CHECK: exit.loopexit:
671+
; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], [[LOOP_LATCH]] ]
672+
; CHECK-NEXT: br label [[EXIT]]
673+
; CHECK: exit:
674+
; CHECK-NEXT: [[SUM_NEXT_LCSSA1:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[LOOP_LATCH_PEEL]] ], [ [[SUM_NEXT_LCSSA]], [[EXIT_LOOPEXIT]] ]
675+
; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA1]]
676+
; CHECK: unreachable.exit.loopexit:
677+
; CHECK-NEXT: br label [[UNREACHABLE_EXIT1]]
678+
; CHECK: unreachable.exit:
679+
; CHECK-NEXT: call void @foo()
680+
; CHECK-NEXT: unreachable
681+
;
682+
entry:
683+
br label %loop.header
684+
685+
loop.header:
686+
%iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
687+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ]
688+
br i1 %c.1, label %then, label %unreachable.exit
689+
690+
then:
691+
%i = load i32, ptr %inv
692+
%cond = icmp ugt i32 %i, 0
693+
call void @llvm.assume(i1 %cond)
694+
%c.2 = icmp ult i32 %i, 2
695+
br i1 %c.2, label %loop.latch, label %unreachable.exit
696+
697+
loop.latch:
698+
%gep = getelementptr i32, ptr %ptr, i32 %iv
699+
%lv = load i32, ptr %gep
700+
%sum.next = add i32 %sum, %lv
701+
%iv.next = add nuw nsw i32 %iv, 1
702+
%c.3 = icmp ult i32 %iv, 1000
703+
br i1 %c.3, label %loop.header, label %exit
704+
705+
exit:
706+
ret i32 %sum.next
707+
708+
unreachable.exit:
709+
call void @foo()
710+
unreachable
711+
}

0 commit comments

Comments
 (0)