Skip to content

Commit 4c49512

Browse files
[LLVM][MemCpyOpt] Unify alias tags if we optimize allocas
Optimization of alloca instructions may lead to invalid alias tags. Incorrect alias tags can lead to wrong optimization results. This commit unifies alias tags if memcpy optimization replaces two arrays by one array.
1 parent 900220d commit 4c49512

File tree

2 files changed

+119
-4
lines changed

2 files changed

+119
-4
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,6 +1516,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15161516
SmallVector<Instruction *, 4> LifetimeMarkers;
15171517
SmallSet<Instruction *, 4> NoAliasInstrs;
15181518
bool SrcNotDom = false;
1519+
SmallSet<Instruction *, 4> SrcAllocaInstUsers;
1520+
SmallSet<Instruction *, 4> DestAllocaInstUsers;
15191521

15201522
// Recursively track the user and check whether modified alias exist.
15211523
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
@@ -1524,8 +1526,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15241526
};
15251527

15261528
auto CaptureTrackingWithModRef =
1527-
[&](Instruction *AI,
1528-
function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1529+
[&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1530+
SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool {
15291531
SmallVector<Instruction *, 8> Worklist;
15301532
Worklist.push_back(AI);
15311533
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1569,6 +1571,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15691571
continue;
15701572
}
15711573
}
1574+
if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) {
1575+
AllocaInstUsersWithTBAA.insert(UI);
1576+
}
15721577
if (UI->hasMetadata(LLVMContext::MD_noalias))
15731578
NoAliasInstrs.insert(UI);
15741579
if (!ModRefCallback(UI))
@@ -1621,7 +1626,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16211626
return true;
16221627
};
16231628

1624-
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1629+
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1630+
DestAllocaInstUsers))
16251631
return false;
16261632
// Bailout if Dest may have any ModRef before Store.
16271633
if (!ReachabilityWorklist.empty() &&
@@ -1647,7 +1653,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16471653
return true;
16481654
};
16491655

1650-
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1656+
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1657+
SrcAllocaInstUsers))
16511658
return false;
16521659

16531660
// We can do the transformation. First, move the SrcAlloca to the start of the
@@ -1681,6 +1688,15 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16811688
for (Instruction *I : NoAliasInstrs)
16821689
I->setMetadata(LLVMContext::MD_noalias, nullptr);
16831690

1691+
// If we merge two allocas we need to uniform alias tags as well
1692+
if (!SrcAllocaInstUsers.empty()) {
1693+
MDNode *mergeTBAA =
1694+
(*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
1695+
for (Instruction *it : DestAllocaInstUsers) {
1696+
it->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
1697+
}
1698+
}
1699+
16841700
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
16851701
NumStackMove++;
16861702
return true;
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
2+
; The aim of this test is to check if MemCpyOpt pass merges alias tags
3+
; after memcpy optimization
4+
5+
; ModuleID = 'FIRModule'
6+
source_filename = "FIRModule"
7+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
8+
target triple = "x86_64-unknown-linux-gnu"
9+
10+
@data_arr = internal unnamed_addr constant [31 x float] [float 0x3E68DA0CA0000000, float 0x3E692863A0000000, float 0x3E6AEF5000000000, float 0x3E6E2272C0000000, float 0x3E7271B720000000, float 0x3E777DA440000000, float 0x3E7E8C46C0000000, float 0x3E8458EFC0000000, float 0x3E8D0123C0000000, float 0x3E95E78260000000, float 0x3EA0AB7AC0000000, float 0x3EA89F4B40000000, float 0x3EB10FFB60000000, float 0x3EB5F1D140000000, float 0x3EBB435260000000, float 0x3EC0DE9700000000, float 0x3EC51B11A0000000, float 0x3ECA419FC0000000, float 0x3ED01B2B20000000, float 0x3ED3B9CEC0000000, float 0x3ED7028C40000000, float 0x3EDA60C320000000, float 0x3EDD54AD40000000, float 0x3EDF6E9F00000000, float 0x3EE130BB20000000, float 0x3EE4332400000000, float 0x3EE7575F80000000, float 0x3EE8088A60000000, float 0x3EE3B0AE60000000, float 0x3ED9BB6800000000, float 0x3ED9BB6800000000]
11+
12+
; CHECK-LABEL: @test(
13+
; CHECK: [[ARR_UNDER_TEST:%.*]] = alloca [31 x float], align 4
14+
; CHECK: store float 0x3E6AA51880000000, ptr [[ARR_UNDER_TEST]], align 4, !tbaa [[ARR_TAG:!.[0-9]+]]
15+
; CHECK-LABEL: init_loop:
16+
; CHECK: store float [[TMP0:%.*]], ptr [[TMP1:%.*]], align 4, !tbaa [[ARR_TAG]]
17+
; CHECK-LABEL: loop:
18+
; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
19+
; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
20+
define void @test(ptr captures(none) %0, ptr readonly captures(none) %1, ptr readonly captures(none) %2, ptr readonly captures(none) %3) local_unnamed_addr #0 {
21+
%5 = alloca [32 x float], align 4
22+
%6 = alloca [31 x float], align 4
23+
%7 = alloca [31 x float], align 4
24+
%8 = load i32, ptr %2, align 4, !tbaa !4
25+
%9 = sext i32 %8 to i64
26+
%10 = load i32, ptr %3, align 4, !tbaa !10
27+
%11 = add i32 %10, 1
28+
%12 = sext i32 %11 to i64
29+
%13 = sub nsw i64 %12, %9
30+
%14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -1)
31+
%15 = add nsw i64 %14, 1
32+
%16 = alloca float, i64 %15, align 4
33+
store float 0x3E6AA51880000000, ptr %7, align 4, !tbaa !12
34+
br label %init_loop
35+
36+
init_loop:
37+
%19 = phi float [ 0x3E68DA0CA0000000, %4 ], [ %22, %init_loop ]
38+
%indvars.iv = phi i64 [ 2, %4 ], [ %indvars.iv.next, %init_loop ]
39+
%20 = add nsw i64 %indvars.iv, -1
40+
%21 = getelementptr float, ptr @data_arr, i64 %20
41+
%22 = load float, ptr %21, align 4, !tbaa !15
42+
%23 = fsub contract float %22, %19
43+
%33 = getelementptr float, ptr %7, i64 %20
44+
store float %23, ptr %33, align 4, !tbaa !12
45+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
46+
%exitcond.not = icmp eq i64 %indvars.iv.next, 32
47+
br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop
48+
49+
.preheader55.preheader:
50+
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %6, ptr noundef nonnull align 4 dereferenceable(124) %7, i64 124, i1 false), !tbaa !22
51+
%154 = icmp sgt i64 %13, -1
52+
br i1 %154, label %loop, label %._crit_edge56
53+
54+
loop: ; preds = %.preheader, %211
55+
%indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
56+
%indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
57+
%223 = getelementptr float, ptr %6, i64 %indvars.iv73
58+
%225 = load float, ptr %223, align 4, !tbaa !31
59+
%exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
60+
br i1 %exitcond76.not, label %loop, label %._crit_edge56
61+
62+
._crit_edge56: ; preds = %loop, %._crit_edge
63+
ret void
64+
}
65+
66+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
67+
declare i64 @llvm.smax.i64(i64, i64) #1
68+
69+
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
70+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2
71+
72+
attributes #0 = { "target-cpu"="x86-64" }
73+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
74+
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
75+
76+
!llvm.module.flags = !{!0, !1, !2}
77+
!llvm.ident = !{!3}
78+
79+
!0 = !{i32 2, !"Debug Info Version", i32 3}
80+
!1 = !{i32 8, !"PIC Level", i32 2}
81+
!2 = !{i32 7, !"PIE Level", i32 2}
82+
!3 = !{!"flang version 21.0.0 (https://github.com/llvm/llvm-project.git 4d79f420ce5b5100f72f720eab2d3881f97abd0d)"}
83+
!4 = !{!5, !5, i64 0}
84+
!5 = !{!"dummy arg data/param_1", !6, i64 0}
85+
!6 = !{!"dummy arg data", !7, i64 0}
86+
!7 = !{!"any data access", !8, i64 0}
87+
!8 = !{!"any access", !9, i64 0}
88+
!9 = !{!"Flang function root test"}
89+
!10 = !{!11, !11, i64 0}
90+
!11 = !{!"dummy arg data/param_2", !6, i64 0}
91+
!12 = !{!13, !13, i64 0}
92+
!13 = !{!"allocated data/test_array_a", !14, i64 0}
93+
!14 = !{!"allocated data", !7, i64 0}
94+
!15 = !{!16, !16, i64 0}
95+
!16 = !{!"global data/data_arr", !17, i64 0}
96+
!17 = !{!"global data", !7, i64 0}
97+
!22 = !{!14, !14, i64 0}
98+
!31 = !{!32, !32, i64 0}
99+
!32 = !{!"allocated data/test_array_b", !14, i64 0}

0 commit comments

Comments
 (0)