Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
SmallVector<Instruction *, 4> LifetimeMarkers;
SmallSet<Instruction *, 4> NoAliasInstrs;
bool SrcNotDom = false;
SmallSet<Instruction *, 4> SrcAllocaInstUsers;
SmallSet<Instruction *, 4> DestAllocaInstUsers;

// Recursively track the user and check whether modified alias exist.
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
Expand All @@ -1524,8 +1526,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
};

auto CaptureTrackingWithModRef =
[&](Instruction *AI,
function_ref<bool(Instruction *)> ModRefCallback) -> bool {
[&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for the argument, the variable is captured.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(AI);
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
Expand Down Expand Up @@ -1569,6 +1571,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about MD_tbaa_struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added MD_tbaa_struct .

AllocaInstUsersWithTBAA.insert(UI);
if (UI->hasMetadata(LLVMContext::MD_noalias))
NoAliasInstrs.insert(UI);
if (!ModRefCallback(UI))
Expand Down Expand Up @@ -1621,7 +1625,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};

if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
DestAllocaInstUsers))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
Expand All @@ -1647,7 +1652,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};

if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
SrcAllocaInstUsers))
return false;

// We can do the transformation. First, move the SrcAlloca to the start of the
Expand Down Expand Up @@ -1681,6 +1687,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
for (Instruction *I : NoAliasInstrs)
I->setMetadata(LLVMContext::MD_noalias, nullptr);

// If we merge two allocas we need to uniform alias tags as well
if (!SrcAllocaInstUsers.empty()) {
MDNode *mergeTBAA =
(*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
for (Instruction *It : DestAllocaInstUsers)
It->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look safe? You are just assigning one tag to all the accesses now. There may be accesses with different tags, and replacing them with a different one would be a miscompile.

The easy fix here would be to just drop all the tbaa data like is done for noalias metadata. Otherwise you need to do by-offset analysis of accesses and merge the tbaa tags for specific offsets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case, we should have a test case where multiple tbaa tags are involved at different offsets.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dropped tbaa metadata and I added another test.

}

LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
NumStackMove++;
return true;
Expand Down
73 changes: 73 additions & 0 deletions llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test still doesn't look minimal. Wouldn't it be sufficient to have a store with one tbaa tag, a memcpy and a load with another tag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed unnecessary instructions.

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s

define void @test() local_unnamed_addr {
; CHECK-LABEL: define void @test() local_unnamed_addr {
; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
; CHECK-NEXT: br label %[[INIT_LOOP:.*]]
; CHECK: [[INIT_LOOP]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INIT_LOOP]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[DOTPREHEADER55_PREHEADER:.*]], label %[[INIT_LOOP]]
; CHECK: [[_PREHEADER55_PREHEADER:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[INDVARS_IV73:%.*]] = phi i64 [ 0, %[[DOTPREHEADER55_PREHEADER]] ], [ [[INDVARS_IV_NEXT74:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT74]] = add nuw nsw i64 [[INDVARS_IV73]], 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV73]]
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[EXITCOND76_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT74]], 32
; CHECK-NEXT: br i1 [[EXITCOND76_NOT]], label %[[LOOP]], [[DOT_CRIT_EDGE56:label %.*]]
; CHECK: [[__CRIT_EDGE56:.*:]]
; CHECK-NEXT: ret void
;
%test_array_a = alloca [31 x float], align 4
%test_array_b = alloca [31 x float], align 4
br label %init_loop

init_loop:
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %init_loop ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%1 = getelementptr float, ptr %test_array_b, i64 %indvars.iv
store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !12
%exitcond.not = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop

.preheader55.preheader:
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
br label %loop

loop: ; preds = %.preheader, %211
%indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
%indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
%2 = getelementptr float, ptr %test_array_a, i64 %indvars.iv73
%3 = load float, ptr %2, align 4, !tbaa !31
%exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
br i1 %exitcond76.not, label %loop, label %._crit_edge56

._crit_edge56: ; preds = %loop, %._crit_edge
ret void
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg)

!7 = !{!"any data access", !8, i64 0}
!8 = !{!"any access", !9, i64 0}
!9 = !{!"Flang function root test"}
!12 = !{!13, !13, i64 0}
!13 = !{!"allocated data/test_array_a", !14, i64 0}
!14 = !{!"allocated data", !7, i64 0}
!31 = !{!32, !32, i64 0}
!32 = !{!"allocated data/test_array_b", !14, i64 0}
;.
; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
; CHECK: [[META1]] = !{!"allocated data/test_array_a", [[META2:![0-9]+]], i64 0}
; CHECK: [[META2]] = !{!"allocated data", [[META3:![0-9]+]], i64 0}
; CHECK: [[META3]] = !{!"any data access", [[META4:![0-9]+]], i64 0}
; CHECK: [[META4]] = !{!"any access", [[META5:![0-9]+]], i64 0}
; CHECK: [[META5]] = !{!"Flang function root test"}
;.