Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,8 @@ class MDNode : public Metadata {
static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B,
const Instruction *AInstr,
const Instruction *BInstr);
static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B);
static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B);
};

/// Tuple of metadata.
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/Local.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr);

/// DO NOT CALL EXTERNALLY.
/// FIXME: https://github.com/llvm/llvm-project/issues/121495
/// Once external callers of this function are removed, either inline into
/// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
///
/// Combine the metadata of two instructions so that K can replace J. Some
/// metadata kinds can only be kept if K does not move, meaning it dominated
/// J in the original IR.
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Analysis/MemoryProfileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,24 @@ template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
return mdconst::dyn_extract<ConstantInt>(N->operands().back())
->getZExtValue();
}

MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) {
if (!(A && B)) {
return A ? A : B;
}

// TODO: Support more sophisticated merging, such as selecting the one with
// more bytes allocated, or implement support for carrying multiple allocation
// leaf contexts. For now, keep the first one.
return A;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic is a little hard to read. Can this be replaced with --

if(A) return A;
return B;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) {
if (!(A && B)) {
return A ? A : B;
}

// TODO: Support more sophisticated merging, which will require support for
// carrying multiple contexts. For now, keep the first one.
return A;
}
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
BasicBlock *BB = std::get<0>(Incoming);
Value *V = std::get<1>(Incoming);
LoadInst *LI = cast<LoadInst>(V);
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Call combineMetadataForCSE instead, so that an explicit set of KnownIDs
// doesn't need to be maintained here.
combineMetadata(NewLI, LI, KnownIDs, true);
Value *NewInVal = LI->getOperand(0);
if (NewInVal != InVal)
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,14 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group};
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
LLVMContext::MD_access_group, LLVMContext::MD_prof,
LLVMContext::MD_memprof, LLVMContext::MD_callsite};
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Use custom AA metadata combining handling instead of combineMetadata, which
// is meant for CSE and will drop any metadata not in the KnownIDs list.
combineMetadata(ReplInst, I, KnownIDs, true);
}

Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3308,6 +3308,9 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
return Changed;
}

// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Once external callers of this function are removed, either inline into
// combineMetadataForCSE, or internalize and remove KnownIDs parameter.
void llvm::combineMetadata(Instruction *K, const Instruction *J,
ArrayRef<unsigned> KnownIDs, bool DoesKMove) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
Expand All @@ -3320,6 +3323,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,

switch (Kind) {
default:
// FIXME: https://github.com/llvm/llvm-project/issues/121495
// Change to removing only explicitly listed other metadata, and assert
// on unknown metadata, to avoid inadvertently dropping newly added
// metadata types.
K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
Expand Down Expand Up @@ -3379,6 +3386,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_memprof:
K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD));
break;
case LLVMContext::MD_callsite:
K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD));
break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
Expand Down Expand Up @@ -3442,7 +3455,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
LLVMContext::MD_nontemporal,
LLVMContext::MD_noundef,
LLVMContext::MD_mmra,
LLVMContext::MD_noalias_addrspace};
LLVMContext::MD_noalias_addrspace,
LLVMContext::MD_memprof,
LLVMContext::MD_callsite};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}

Expand Down
18 changes: 18 additions & 0 deletions llvm/test/Transforms/MemCpyOpt/memcpy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
ret void
}

define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) {
; CHECK-LABEL: @byval_param_profile_metadata(
; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4
; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]]
; CHECK-NEXT: ret void
;
%tmp = alloca i32, align 4
store i32 1, ptr %ptr
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false)
call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7
ret void
}

define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) {
; CHECK-LABEL: @memcpy_memory_none(
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]]
Expand Down Expand Up @@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) {
!0 = !{!0}
!1 = !{!1, !0}
!2 = !{!1}
!3 = !{!4}
!4 = !{!5, !"cold"}
!5 = !{i64 123, i64 456}
!6 = !{i64 123}
!7 = !{!"branch_weights", i32 10}
51 changes: 51 additions & 0 deletions llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

;; Test to ensure that memprof related metadata is not dropped when
;; instructions are combined. Currently the metadata from the first instruction
;; is kept, which prevents full loss of profile context information.

; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 {
; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb(
; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]]
; CHECK-NEXT: ret ptr [[CALL]]
;
entry:
br i1 %b, label %if.then, label %if.else

if.then: ; preds = %entry
%call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3
br label %if.end

if.else: ; preds = %entry
%call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7
br label %if.end

if.end: ; preds = %if.else, %if.then
%x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
ret ptr %x.0
}


declare ptr @_Znwm(i64) nounwind readonly

!0 = !{!1}
!1 = !{!2, !"notcold"}
!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
!3 = !{i64 -852997907418798798}
!4 = !{!5}
!5 = !{!6, !"cold"}
!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434}
!7 = !{i64 123}
;.
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"}
; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434}
; CHECK: [[META3]] = !{i64 -852997907418798798}
;.
Loading