llvm · teresajohnson · Jul 11, 2025 · Jul 9, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -95,6 +95,7 @@ STATISTIC(NewMergedNodes, "Number of new nodes created during merging");
 STATISTIC(NonNewMergedNodes, "Number of non new nodes used during merging");
 STATISTIC(MissingAllocForContextId,
           "Number of missing alloc nodes for context ids");
+STATISTIC(SkippedCallsCloning, "Number of calls skipped during cloning");
 
 static cl::opt<std::string> DotFilePathPrefix(
     "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -5155,6 +5156,19 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
 
       assert(!isMemProfClone(*CalledFunction));
 
+      // Because we update the cloned calls by calling setCalledOperand (see
+      // comment below), out of an abundance of caution make sure the called
+      // function was actually the called operand (or its aliasee). We also
+      // strip pointer casts when looking for calls (to match behavior during
+      // summary generation), however, with opaque pointers in theory this
+      // should not be an issue. Note we still clone the current function
+      // (containing this call) above, as that could be needed for its callers.
+      auto *GA = dyn_cast_or_null<GlobalAlias>(CB->getCalledOperand());
+      if (CalledFunction != CB->getCalledOperand() &&
+          (!GA || CalledFunction != GA->getAliaseeObject())) {
+        SkippedCallsCloning++;
+        return;
+      }
       // Update the calls per the summary info.
       // Save orig name since it gets updated in the first iteration
       // below.
@@ -5173,7 +5187,13 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
           CBClone = CB;
         else
           CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
-        CBClone->setCalledFunction(NewF);
+        // Set the called operand directly instead of calling setCalledFunction,
+        // as the latter mutates the function type on the call. In rare cases
+        // we may have a slightly different type on a callee function
+        // declaration due to it being imported from a different module with
+        // incomplete types. We really just want to change the name of the
+        // function to the clone, and not make any type changes.
+        CBClone->setCalledOperand(NewF.getCallee());
         ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
                  << ore::NV("Call", CBClone) << " in clone "
                  << ore::NV("Caller", CBClone->getFunction())

diff --git a/llvm/test/ThinLTO/X86/memprof_callee_type_mismatch.ll b/llvm/test/ThinLTO/X86/memprof_callee_type_mismatch.ll
@@ -0,0 +1,62 @@
+;; Test to ensure the callite when updated to call a clone does not mutate the
+;; callee function type. In rare cases we may end up with a callee declaration
+;; that does not match the call type, because it was imported from a different
+;; module with an incomplete return type (in which case clang gives it a void
+;; return type).
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: llvm-as src.ll -o src.o
+; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc
+; RUN: opt -passes=memprof-context-disambiguation src.o -S -memprof-import-summary=src.o.thinlto.bc | FileCheck %s
+
+;--- src.ll
+; ModuleID = 'src.o'
+source_filename = "src.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main(ptr %b) {
+entry:
+  ;; This call is not changed as the summary specifies clone 0.
+  ; CHECK: call ptr @_Z3foov()
+  %call = call ptr @_Z3foov(), !callsite !5
+  ;; After changing this call to call a clone, the function type should still
+  ;; be ptr, despite the void on the callee declaration.
+  ; CHECK: call ptr @_Z3foov.memprof.1()
+  %call1 = call ptr @_Z3foov(), !callsite !6
+  %0 = load ptr, ptr %b, align 8
+  ;; Although the summary indicates this should call clone 1, and the VP
+  ;; metadata indicates the callee is _Z3foov, it is not updated because
+  ;; the ICP facility requires the function types to match.
+  ; CHECK: call ptr %0()
+  %call2 = call ptr %0(), !prof !7, !callsite !8
+  ret i32 0
+}
+
+;; Both the original callee function declaration and its clone have void return
+;; type.
+; CHECK: declare void @_Z3foov()
+; CHECK: declare void @_Z3foov.memprof.1()
+declare void @_Z3foov()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 21.0.0git ([email protected]:llvm/llvm-project.git e391301e0e4d9183fe06e69602e87b0bc889aeda)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "src.cc", directory: "", checksumkind: CSK_MD5, checksum: "8636c46e81402013b9d54e8307d2f149")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!5 = !{i64 8632435727821051414}
+!6 = !{i64 -3421689549917153178}
+!7 = !{!"VP", i32 0, i64 4, i64 9191153033785521275, i64 4}
+!8 = !{i64 1234}
+
+;--- src.o.thinlto.ll
+; ModuleID = 'src.o.thinlto.bc'
+source_filename = "src.o.thinlto.bc"
+
+^0 = module: (path: "src.o", hash: (2823430083, 3994560862, 899296057, 1055405378, 2961356784))
+^1 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414)), (callee: null, clones: (1), stackIds: (15025054523792398438)), (callee: null, clones: (1), stackIds: (1234))))))
+^2 = flags: 353
+^3 = blockcount: 0