-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas #129537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
4c49512
5e04c39
dddcc8a
ae4139a
3ab8a71
0824f68
b9cb648
7fc13ac
2f00aa7
92584ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1516,6 +1516,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| SmallVector<Instruction *, 4> LifetimeMarkers; | ||
| SmallSet<Instruction *, 4> NoAliasInstrs; | ||
| bool SrcNotDom = false; | ||
| SmallSet<Instruction *, 4> SrcAllocaInstUsers; | ||
| SmallSet<Instruction *, 4> DestAllocaInstUsers; | ||
|
|
||
| // Recursively track the user and check whether modified alias exist. | ||
| auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { | ||
|
|
@@ -1524,8 +1526,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| }; | ||
|
|
||
| auto CaptureTrackingWithModRef = | ||
| [&](Instruction *AI, | ||
| function_ref<bool(Instruction *)> ModRefCallback) -> bool { | ||
| [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback, | ||
| SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool { | ||
| SmallVector<Instruction *, 8> Worklist; | ||
| Worklist.push_back(AI); | ||
| unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); | ||
|
|
@@ -1569,6 +1571,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| continue; | ||
| } | ||
| } | ||
| if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) | ||
|
||
| AllocaInstUsersWithTBAA.insert(UI); | ||
| if (UI->hasMetadata(LLVMContext::MD_noalias)) | ||
| NoAliasInstrs.insert(UI); | ||
| if (!ModRefCallback(UI)) | ||
|
|
@@ -1621,7 +1625,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| return true; | ||
| }; | ||
|
|
||
| if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) | ||
| if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback, | ||
| DestAllocaInstUsers)) | ||
| return false; | ||
| // Bailout if Dest may have any ModRef before Store. | ||
| if (!ReachabilityWorklist.empty() && | ||
|
|
@@ -1647,7 +1652,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| return true; | ||
| }; | ||
|
|
||
| if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) | ||
| if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback, | ||
| SrcAllocaInstUsers)) | ||
| return false; | ||
|
|
||
| // We can do the transformation. First, move the SrcAlloca to the start of the | ||
|
|
@@ -1681,6 +1687,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, | |
| for (Instruction *I : NoAliasInstrs) | ||
| I->setMetadata(LLVMContext::MD_noalias, nullptr); | ||
|
|
||
| // If we merge two allocas we need to uniform alias tags as well | ||
| if (!SrcAllocaInstUsers.empty()) { | ||
| MDNode *mergeTBAA = | ||
| (*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa); | ||
| for (Instruction *It : DestAllocaInstUsers) | ||
| It->setMetadata(LLVMContext::MD_tbaa, mergeTBAA); | ||
|
||
| } | ||
|
|
||
| LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); | ||
| NumStackMove++; | ||
| return true; | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test still doesn't look minimal. Wouldn't it be sufficient to have a store with one tbaa tag, a memcpy and a load with another tag?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed unnecessary instructions. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s | ||
DominikAdamski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| define void @test() local_unnamed_addr { | ||
| ; CHECK-LABEL: define void @test() local_unnamed_addr { | ||
| ; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4 | ||
| ; CHECK-NEXT: br label %[[INIT_LOOP:.*]] | ||
| ; CHECK: [[INIT_LOOP]]: | ||
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INIT_LOOP]] ] | ||
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV]] | ||
| ; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]] | ||
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[DOTPREHEADER55_PREHEADER:.*]], label %[[INIT_LOOP]] | ||
| ; CHECK: [[_PREHEADER55_PREHEADER:.*:]] | ||
| ; CHECK-NEXT: br label %[[LOOP:.*]] | ||
| ; CHECK: [[LOOP]]: | ||
| ; CHECK-NEXT: [[INDVARS_IV73:%.*]] = phi i64 [ 0, %[[DOTPREHEADER55_PREHEADER]] ], [ [[INDVARS_IV_NEXT74:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[INDVARS_IV_NEXT74]] = add nuw nsw i64 [[INDVARS_IV73]], 1 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV73]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA0]] | ||
| ; CHECK-NEXT: [[EXITCOND76_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT74]], 32 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND76_NOT]], label %[[LOOP]], [[DOT_CRIT_EDGE56:label %.*]] | ||
| ; CHECK: [[__CRIT_EDGE56:.*:]] | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %test_array_a = alloca [31 x float], align 4 | ||
| %test_array_b = alloca [31 x float], align 4 | ||
| br label %init_loop | ||
|
|
||
| init_loop: | ||
| %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %init_loop ] | ||
| %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
| %1 = getelementptr float, ptr %test_array_b, i64 %indvars.iv | ||
| store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !12 | ||
| %exitcond.not = icmp eq i64 %indvars.iv.next, 32 | ||
| br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop | ||
|
|
||
| .preheader55.preheader: | ||
| call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false) | ||
| br label %loop | ||
|
|
||
| loop: ; preds = %.preheader, %211 | ||
| %indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ] | ||
| %indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1 | ||
| %2 = getelementptr float, ptr %test_array_a, i64 %indvars.iv73 | ||
| %3 = load float, ptr %2, align 4, !tbaa !31 | ||
| %exitcond76.not = icmp eq i64 %indvars.iv.next74, 32 | ||
| br i1 %exitcond76.not, label %loop, label %._crit_edge56 | ||
|
|
||
| ._crit_edge56: ; preds = %loop, %._crit_edge | ||
| ret void | ||
| } | ||
|
|
||
| ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) | ||
| declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) | ||
|
|
||
| !7 = !{!"any data access", !8, i64 0} | ||
| !8 = !{!"any access", !9, i64 0} | ||
| !9 = !{!"Flang function root test"} | ||
| !12 = !{!13, !13, i64 0} | ||
| !13 = !{!"allocated data/test_array_a", !14, i64 0} | ||
| !14 = !{!"allocated data", !7, i64 0} | ||
| !31 = !{!32, !32, i64 0} | ||
| !32 = !{!"allocated data/test_array_b", !14, i64 0} | ||
| ;. | ||
| ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} | ||
| ; CHECK: [[META1]] = !{!"allocated data/test_array_a", [[META2:![0-9]+]], i64 0} | ||
| ; CHECK: [[META2]] = !{!"allocated data", [[META3:![0-9]+]], i64 0} | ||
| ; CHECK: [[META3]] = !{!"any data access", [[META4:![0-9]+]], i64 0} | ||
| ; CHECK: [[META4]] = !{!"any access", [[META5:![0-9]+]], i64 0} | ||
| ; CHECK: [[META5]] = !{!"Flang function root test"} | ||
| ;. | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for the argument, the variable is captured.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done