Skip to content

Commit 3e226cb

Browse files
committed
[MemProf][PGO] Prevent dropping of profile metadata during optimization
This patch fixes a couple of places where memprof-related metadata (!memprof and !callsite) were being dropped, and one place where PGO metadata (!prof) was being dropped. All were due to instances of combineMetadata() being invoked. That function drops all metadata not in the list provided by the client, and also drops any not in its switch statement. Memprof metadata needed a case in the combineMetadata switch statement. For now we simply keep the metadata of the instruction being kept, which doesn't retain all the profile information when two calls with memprof metadata are being combined, but at least retains some. For the memprof metadata being dropped during call CSE, add memprof and callsite metadata to the list of known ids in combineMetadataForCSE. Neither memprof nor regular prof metadata were in the list of known ids for the callsite in MemCpyOptimizer, which was added to combine AA metadata after optimization of byval arguments fed by memcpy instructions, and similar types of optimizations of memcpy uses. There is one other callsite of combineMetadata, but it is only invoked on load instructions, which do not carry these types of metadata.
1 parent b2fd0a7 commit 3e226cb

File tree

4 files changed

+81
-5
lines changed

4 files changed

+81
-5
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,11 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
345345
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
346346
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
347347
// handled here, but combineMetadata doesn't support them yet
348-
unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
349-
LLVMContext::MD_noalias,
350-
LLVMContext::MD_invariant_group,
351-
LLVMContext::MD_access_group};
348+
unsigned KnownIDs[] = {
349+
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
350+
LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
351+
LLVMContext::MD_access_group, LLVMContext::MD_prof,
352+
LLVMContext::MD_memprof, LLVMContext::MD_callsite};
352353
combineMetadata(ReplInst, I, KnownIDs, true);
353354
}
354355

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3379,6 +3379,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
33793379
K->setMetadata(Kind,
33803380
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
33813381
break;
3382+
case LLVMContext::MD_memprof:
3383+
case LLVMContext::MD_callsite:
3384+
// Preserve !memprof and !callsite metadata on K.
3385+
break;
33823386
case LLVMContext::MD_preserve_access_index:
33833387
// Preserve !preserve.access.index in K.
33843388
break;
@@ -3442,7 +3446,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
34423446
LLVMContext::MD_nontemporal,
34433447
LLVMContext::MD_noundef,
34443448
LLVMContext::MD_mmra,
3445-
LLVMContext::MD_noalias_addrspace};
3449+
LLVMContext::MD_noalias_addrspace,
3450+
LLVMContext::MD_memprof,
3451+
LLVMContext::MD_callsite};
34463452
combineMetadata(K, J, KnownIDs, KDominatesJ);
34473453
}
34483454

llvm/test/Transforms/MemCpyOpt/memcpy.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
803803
ret void
804804
}
805805

806+
define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) {
807+
; CHECK-LABEL: @byval_param_profile_metadata(
808+
; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4
809+
; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
810+
; CHECK-NEXT: ret void
811+
;
812+
%tmp = alloca i32, align 4
813+
store i32 1, ptr %ptr
814+
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false)
815+
call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7
816+
ret void
817+
}
818+
806819
define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) {
807820
; CHECK-LABEL: @memcpy_memory_none(
808821
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]]
@@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) {
897910
!0 = !{!0}
898911
!1 = !{!1, !0}
899912
!2 = !{!1}
913+
!3 = !{!4}
914+
!4 = !{!5, !"cold"}
915+
!5 = !{i64 123, i64 456}
916+
!6 = !{i64 123}
917+
!7 = !{!"branch_weights", i32 10}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
3+
;; Test to ensure that memprof related metadata is not dropped when
4+
;; instructions are combined. Currently the metadata from the first instruction
5+
;; is kept, which prevents full loss of profile context information.
6+
7+
; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
8+
9+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
10+
target triple = "x86_64-unknown-linux-gnu"
11+
12+
define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 {
13+
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb(
14+
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
15+
; CHECK-NEXT: [[ENTRY:.*:]]
16+
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
17+
; CHECK-NEXT: ret ptr [[CALL]]
18+
;
19+
entry:
20+
br i1 %b, label %if.then, label %if.else
21+
22+
if.then: ; preds = %entry
23+
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
24+
br label %if.end
25+
26+
if.else: ; preds = %entry
27+
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
28+
br label %if.end
29+
30+
if.end: ; preds = %if.else, %if.then
31+
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
32+
ret ptr %x.0
33+
}
34+
35+
36+
declare ptr @_Znwm(i64) nounwind readonly
37+
38+
!0 = !{!1}
39+
!1 = !{!2, !"notcold"}
40+
!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
41+
!3 = !{i64 -852997907418798798}
42+
!4 = !{!5}
43+
!5 = !{!6, !"cold"}
44+
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
45+
!7 = !{i64 123}
46+
;.
47+
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
48+
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
49+
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
50+
; CHECK: [[META3]] = !{i64 -852997907418798798}
51+
;.

0 commit comments

Comments
 (0)