Skip to content

Commit b75bf75

Browse files
efriedma-quictstellar
authored andcommitted
[LoopIdiom] Fix bailout for aliasing in memcpy transform.
Commit dd5991c modified the aliasing checks here to allow transforming a memcpy where the source and destination point into the same object. However, the change accidentally made the code skip the alias check for other operations in the loop. Instead of completely skipping the alias check, just skip the check for whether the memcpy aliases itself. Differential Revision: https://reviews.llvm.org/D126486 (cherry picked from commit abdf0da)
1 parent 2e857fe commit b75bf75

File tree

2 files changed

+53
-17
lines changed

2 files changed

+53
-17
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,26 +1420,19 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
14201420

14211421
// If the store is a memcpy instruction, we must check if it will write to
14221422
// the load memory locations. So remove it from the ignored stores.
1423-
if (IsMemCpy)
1424-
IgnoredInsts.erase(TheStore);
14251423
MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
1424+
if (IsMemCpy && !Verifier.IsSameObject)
1425+
IgnoredInsts.erase(TheStore);
14261426
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
14271427
StoreSizeSCEV, *AA, IgnoredInsts)) {
1428-
if (!IsMemCpy) {
1429-
ORE.emit([&]() {
1430-
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
1431-
TheLoad)
1432-
<< ore::NV("Inst", InstRemark) << " in "
1433-
<< ore::NV("Function", TheStore->getFunction())
1434-
<< " function will not be hoisted: "
1435-
<< ore::NV("Reason", "The loop may access load location");
1436-
});
1437-
return Changed;
1438-
}
1439-
// At this point loop may access load only for memcpy in same underlying
1440-
// object. If that's not the case bail out.
1441-
if (!Verifier.IsSameObject)
1442-
return Changed;
1428+
ORE.emit([&]() {
1429+
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
1430+
<< ore::NV("Inst", InstRemark) << " in "
1431+
<< ore::NV("Function", TheStore->getFunction())
1432+
<< " function will not be hoisted: "
1433+
<< ore::NV("Reason", "The loop may access load location");
1434+
});
1435+
return Changed;
14431436
}
14441437

14451438
bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;

llvm/test/Transforms/LoopIdiom/basic.ll

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,6 +1530,49 @@ for.body: ; preds = %entry, %for.body
15301530
br i1 %cmp, label %for.body, label %for.cond.cleanup
15311531
}
15321532

1533+
; Do not form memmove when there's an aliasing operation, even
1534+
; if the memcpy source and destination are in the same object.
1535+
define void @do_not_form_memmove8(i64* %p) {
1536+
; CHECK-LABEL: @do_not_form_memmove8(
1537+
; CHECK-NEXT: entry:
1538+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 1000
1539+
; CHECK-NEXT: br label [[LOOP:%.*]]
1540+
; CHECK: exit:
1541+
; CHECK-NEXT: ret void
1542+
; CHECK: loop:
1543+
; CHECK-NEXT: [[X4:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X13:%.*]], [[LOOP]] ]
1544+
; CHECK-NEXT: [[X5:%.*]] = zext i32 [[X4]] to i64
1545+
; CHECK-NEXT: [[X7:%.*]] = getelementptr inbounds i64, i64* [[P2]], i64 [[X5]]
1546+
; CHECK-NEXT: [[X8:%.*]] = bitcast i64* [[X7]] to i8*
1547+
; CHECK-NEXT: store i64 1, i64* [[X7]], align 4
1548+
; CHECK-NEXT: [[X11:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[X5]]
1549+
; CHECK-NEXT: [[X12:%.*]] = bitcast i64* [[X11]] to i8*
1550+
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[X12]], i8* [[X8]], i64 8, i1 false)
1551+
; CHECK-NEXT: [[X13]] = add i32 [[X4]], 1
1552+
; CHECK-NEXT: [[X14:%.*]] = icmp eq i32 [[X13]], 44
1553+
; CHECK-NEXT: br i1 [[X14]], label [[EXIT:%.*]], label [[LOOP]]
1554+
;
1555+
entry:
1556+
%p2 = getelementptr inbounds i64, i64* %p, i64 1000
1557+
br label %loop
1558+
1559+
exit:
1560+
ret void
1561+
1562+
loop:
1563+
%x4 = phi i32 [ 0, %entry ], [ %x13, %loop ]
1564+
%x5 = zext i32 %x4 to i64
1565+
%x7 = getelementptr inbounds i64, i64* %p2, i64 %x5
1566+
%x8 = bitcast i64* %x7 to i8*
1567+
store i64 1, i64* %x7, align 4
1568+
%x11 = getelementptr inbounds i64, i64* %p, i64 %x5
1569+
%x12 = bitcast i64* %x11 to i8*
1570+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x12, i8* %x8, i64 8, i1 false)
1571+
%x13 = add i32 %x4, 1
1572+
%x14 = icmp eq i32 %x13, 44
1573+
br i1 %x14, label %exit, label %loop
1574+
}
1575+
15331576
;; Memcpy formation is still preferred over memmove.
15341577
define void @prefer_memcpy_over_memmove(i8* noalias %Src, i8* noalias %Dest, i64 %Size) {
15351578
; CHECK-LABEL: @prefer_memcpy_over_memmove(

0 commit comments

Comments
 (0)