Skip to content

Commit d350783

Browse files
steplongtstellar
authored andcommitted
[LoopIdiom] Merge TBAA of adjacent stores when creating memset
Factor in the TBAA of adjacent stores instead of just the head store when merging stores into a memset. We were seeing GVN remove a load that had a TBAA that matched the 2nd store because GVN determined it didn't match the TBAA of the memset. The memset had the TBAA of only the first store. i.e. Loading the field pi_ of shared_count after memset to create an array of shared_ptr template<class T> class shared_ptr { T *p; shared_count refcount; }; class shared_count { sp_counted_base *pi_; }; Differential Revision: https://reviews.llvm.org/D122205 (cherry picked from commit e02f497)
1 parent 198626a commit d350783

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
11731173
CallInst *NewCall;
11741174
if (SplatValue) {
11751175
AAMDNodes AATags = TheStore->getAAMetadata();
1176+
for (Instruction *Store : Stores)
1177+
AATags = AATags.merge(Store->getAAMetadata());
11761178
if (auto CI = dyn_cast<ConstantInt>(NumBytes))
11771179
AATags = AATags.extendTo(CI->getZExtValue());
11781180
else

llvm/test/Transforms/LoopIdiom/memset-tbaa.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,45 @@ for.body:
9191
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
9292
}
9393

94+
%struct.A = type { i32*, %struct.B }
95+
%struct.B = type { i32* }
96+
97+
define dso_local void @adjacent_store_memset(%struct.A* nocapture %a, i64 %len) {
98+
; CHECK-LABEL: @adjacent_store_memset(
99+
; CHECK-NEXT: entry:
100+
; CHECK-NEXT: [[A1:%.*]] = bitcast %struct.A* [[A:%.*]] to i8*
101+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 %len, i64 1)
102+
; CHECK-NEXT: [[LEN:%.*]] = shl nuw i64 [[UMAX]], 4
103+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A1]], i8 0, i64 [[LEN]], i1 false), !tbaa [[TBAA9:![0-9]+]]
104+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
105+
; CHECK: for.cond.cleanup:
106+
; CHECK-NEXT: ret void
107+
; CHECK: for.body:
108+
; CHECK-NEXT: [[I_09:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, %entry ]
109+
; CHECK-NEXT: %p = getelementptr inbounds %struct.A, %struct.A* [[A]], i64 [[I_09]], i32 0
110+
; CHECK-NEXT: %p2 = getelementptr inbounds %struct.A, %struct.A* [[A]], i64 [[I_09]], i32 1, i32 0
111+
; CHECK-NEXT: [[INC]] = add i64 [[I_09]], 1
112+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ult i64 [[INC]], %len
113+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
114+
;
115+
entry:
116+
br label %for.body
117+
118+
for.cond.cleanup:
119+
ret void
120+
121+
for.body:
122+
%i.09 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
123+
%p = getelementptr inbounds %struct.A, %struct.A* %a, i64 %i.09, i32 0
124+
store i32* null, i32** %p, align 8, !tbaa !18
125+
%p2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 %i.09, i32 1, i32 0
126+
store i32* null, i32** %p2, align 8, !tbaa !21
127+
%inc = add i64 %i.09, 1
128+
%cmp = icmp ult i64 %inc, %len
129+
br i1 %cmp, label %for.body, label %for.cond.cleanup
130+
}
131+
132+
94133
; CHECK: [[TBAA0]] = !{[[TBAA1:.+]], [[TBAA1]], i64 0}
95134
; CHECK: [[TBAA1]] = !{!"double", [[TBAA2:.+]], i64 0}
96135
; CHECK: [[TBAA2]] = !{!"omnipotent char", [[TBAA3:.+]], i64 0}
@@ -99,6 +138,8 @@ for.body:
99138
; CHECK: [[TBAA5]] = !{[[TBAA7:.+]], i64 32, !"_ZTS1A", [[TBAA6]], i64 0, i64 8, [[TBAA6]], i64 8, i64 8, [[TBAA6]], i64 16, i64 8, [[TBAA6]], i64 24, i64 8}
100139
; CHECK: [[TBAA7]] = !{[[TBAA3]], i64 0, !"omnipotent char"}
101140
; CHECK: [[TBAA6]] = !{[[TBAA7]], i64 8, !"double"}
141+
; CHECK: [[TBAA9]] = !{[[TBAA10:.+]], [[TBAA10]], i64 0}
142+
; CHECK: [[TBAA10]] = !{!"any pointer", [[TBAA2]], i64 0}
102143

103144
!5 = !{!6, !6, i64 0}
104145
!6 = !{!"double", !7, i64 0}
@@ -109,3 +150,9 @@ for.body:
109150
!17 = !{!15, i64 8, !"double"}
110151
!9 = !{!15, i64 32, !"_ZTS1A", !17, i64 0, i64 8, !17, i64 8, i64 8, !17, i64 16, i64 8, !17, i64 24, i64 8}
111152
!10 = !{!9, !17, i64 0, i64 1}
153+
154+
!18 = !{!19, !20, i64 0}
155+
!19 = !{!"A", !20, i64 0, !22, i64 8}
156+
!20 = !{!"any pointer", !7, i64 0}
157+
!21 = !{!22, !20, i64 0}
158+
!22 = !{!"B", !20, i64 0}

0 commit comments

Comments
 (0)