diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 35580f3f38c61..df2384c5f6e69 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1464,6 +1464,8 @@ class MDNode : public Metadata { static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B, const Instruction *AInstr, const Instruction *BInstr); + static MDNode *getMergedMemProfMetadata(MDNode *A, MDNode *B); + static MDNode *getMergedCallsiteMetadata(MDNode *A, MDNode *B); }; /// Tuple of metadata. diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index bbf29e6f46b47..40c448593807b 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -412,6 +412,11 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr); +/// DO NOT CALL EXTERNALLY. +/// FIXME: https://github.com/llvm/llvm-project/issues/121495 +/// Once external callers of this function are removed, either inline into +/// combineMetadataForCSE, or internalize and remove KnownIDs parameter. +/// /// Combine the metadata of two instructions so that K can replace J. Some /// metadata kinds can only be kept if K does not move, meaning it dominated /// J in the original IR. diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp index 1c3f589e84941..2f3c87a89f9f9 100644 --- a/llvm/lib/Analysis/MemoryProfileInfo.cpp +++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -347,3 +347,20 @@ template <> uint64_t CallStack::back() const { return mdconst::dyn_extract(N->operands().back()) ->getZExtValue(); } + +MDNode *MDNode::getMergedMemProfMetadata(MDNode *A, MDNode *B) { + // TODO: Support more sophisticated merging, such as selecting the one with + // more bytes allocated, or implement support for carrying multiple allocation + // leaf contexts. For now, keep the first one. + if (A) + return A; + return B; +} + +MDNode *MDNode::getMergedCallsiteMetadata(MDNode *A, MDNode *B) { + // TODO: Support more sophisticated merging, which will require support for + // carrying multiple contexts. For now, keep the first one. + if (A) + return A; + return B; +} diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 1fcf1c570adda..272a1942c3350 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -788,6 +788,9 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) { BasicBlock *BB = std::get<0>(Incoming); Value *V = std::get<1>(Incoming); LoadInst *LI = cast(V); + // FIXME: https://github.com/llvm/llvm-project/issues/121495 + // Call combineMetadataForCSE instead, so that an explicit set of KnownIDs + // doesn't need to be maintained here. combineMetadata(NewLI, LI, KnownIDs, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bb98b3d1c0725..5f7cb92d239bc 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -345,10 +345,14 @@ static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, static void combineAAMetadata(Instruction *ReplInst, Instruction *I) { // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet - unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_invariant_group, - LLVMContext::MD_access_group}; + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_invariant_group, + LLVMContext::MD_access_group, LLVMContext::MD_prof, + LLVMContext::MD_memprof, LLVMContext::MD_callsite}; + // FIXME: https://github.com/llvm/llvm-project/issues/121495 + // Use custom AA metadata combining handling instead of combineMetadata, which + // is meant for CSE and will drop any metadata not in the KnownIDs list. combineMetadata(ReplInst, I, KnownIDs, true); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index a3af96d5af026..1e4061cb0771e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3308,6 +3308,9 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, return Changed; } +// FIXME: https://github.com/llvm/llvm-project/issues/121495 +// Once external callers of this function are removed, either inline into +// combineMetadataForCSE, or internalize and remove KnownIDs parameter. void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef KnownIDs, bool DoesKMove) { SmallVector, 4> Metadata; @@ -3320,6 +3323,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, switch (Kind) { default: + // FIXME: https://github.com/llvm/llvm-project/issues/121495 + // Change to removing only explicitly listed other metadata, and assert + // on unknown metadata, to avoid inadvertently dropping newly added + // metadata types. K->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: @@ -3379,6 +3386,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); break; + case LLVMContext::MD_memprof: + K->setMetadata(Kind, MDNode::getMergedMemProfMetadata(KMD, JMD)); + break; + case LLVMContext::MD_callsite: + K->setMetadata(Kind, MDNode::getMergedCallsiteMetadata(KMD, JMD)); + break; case LLVMContext::MD_preserve_access_index: // Preserve !preserve.access.index in K. break; @@ -3442,7 +3455,9 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, LLVMContext::MD_nontemporal, LLVMContext::MD_noundef, LLVMContext::MD_mmra, - LLVMContext::MD_noalias_addrspace}; + LLVMContext::MD_noalias_addrspace, + LLVMContext::MD_memprof, + LLVMContext::MD_callsite}; combineMetadata(K, J, KnownIDs, KDominatesJ); } diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 39b90adc74ef3..65d78f4199aa0 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -803,6 +803,19 @@ define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) { ret void } +define void @byval_param_profile_metadata(ptr align 4 byval(i32) %ptr) { +; CHECK-LABEL: @byval_param_profile_metadata( +; CHECK-NEXT: store i32 1, ptr [[PTR2:%.*]], align 4 +; CHECK-NEXT: call void @f_byval(ptr byval(i32) align 4 [[PTR2]]), !prof [[PROF3:![0-9]+]], !memprof [[META4:![0-9]+]], !callsite [[META7:![0-9]+]] +; CHECK-NEXT: ret void +; + %tmp = alloca i32, align 4 + store i32 1, ptr %ptr + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %tmp, ptr align 4 %ptr, i64 4, i1 false) + call void @f_byval(ptr align 4 byval(i32) %tmp), !memprof !3, !callsite !6, !prof !7 + ret void +} + define void @memcpy_memory_none(ptr %p, ptr %p2, i64 %size) { ; CHECK-LABEL: @memcpy_memory_none( ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[P:%.*]], ptr [[P2:%.*]], i64 [[SIZE:%.*]], i1 false) #[[ATTR7:[0-9]+]] @@ -897,3 +910,8 @@ define void @memcpy_immut_escape_after(ptr align 4 noalias %val) { !0 = !{!0} !1 = !{!1, !0} !2 = !{!1} +!3 = !{!4} +!4 = !{!5, !"cold"} +!5 = !{i64 123, i64 456} +!6 = !{i64 123} +!7 = !{!"branch_weights", i32 10} diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll new file mode 100644 index 0000000000000..10c6aeb26ba76 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-memprof.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 + +;; Test to ensure that memprof related metadata is not dropped when +;; instructions are combined. Currently the metadata from the first instruction +;; is kept, which prevents full loss of profile context information. + +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef nonnull ptr @_Z4testb(i1 noundef zeroext %b) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef nonnull ptr @_Z4testb( +; CHECK-SAME: i1 noundef zeroext [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof [[META0:![0-9]+]], !callsite [[META3:![0-9]+]] +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + br i1 %b, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !0, !callsite !3 + br label %if.end + +if.else: ; preds = %entry + %call1 = call noalias noundef nonnull dereferenceable(4) ptr @_Znwm(i64 noundef 4), !memprof !4, !callsite !7 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ] + ret ptr %x.0 +} + + +declare ptr @_Znwm(i64) nounwind readonly + +!0 = !{!1} +!1 = !{!2, !"notcold"} +!2 = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} +!3 = !{i64 -852997907418798798} +!4 = !{!5} +!5 = !{!6, !"cold"} +!6 = !{i64 123, i64 -2101080423462424381, i64 5188446645037944434} +!7 = !{i64 123} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], !"notcold"} +; CHECK: [[META2]] = !{i64 -852997907418798798, i64 -2101080423462424381, i64 5188446645037944434} +; CHECK: [[META3]] = !{i64 -852997907418798798} +;.