llvm · hassnaaHamdi · Sep 23, 2025 · Sep 24, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -343,6 +343,7 @@ Modified Compiler Flags
 -----------------------
 - The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509)
 - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of this limit. (#GH160440)
+- The `-fdevirtualize-speculatively` compiler flag is now supported to enable speculative devirtualization of virtual function calls, it's disabled by default. (#GH159685)
 
 Removed Compiler Flags
 -------------------------

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
@@ -2352,6 +2352,56 @@ are listed below.
    pure ThinLTO, as all split regular LTO modules are merged and LTO linked
    with regular LTO.
 
+.. option:: -fdevirtualize-speculatively
+
+   Enable speculative devirtualization optimization where a virtual call
+   can be transformed into a direct call under the assumption that its
+   object is of a particular type. A runtime check is inserted to validate
+   the assumption before making the direct call, and if the check fails,
+   the original virtual call is made instead. This optimization can enable
+   more inlining opportunities and better optimization of the direct call.
+   This is different from whole program devirtualization optimization
+   that rely on global analysis and hidden visibility of the objects to prove
+   that the object is always of a particular type at a virtual call site.
+   This optimization doesn't require global analysis or hidden visibility.
+   This optimization doesn't devirtualize all virtual calls, but only
+   when there's a single implementation of the virtual function in the module.
+   There could be a single implementation of the virtual function
+   either because the function is not overridden in any derived class,
+   or because all objects are instances of the same class/type.
+
+   Ex of IR before the optimization:
+
+   .. code-block:: llvm
+
+     %vtable = load ptr, ptr %BV, align 8, !tbaa !6
+     %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base")
+     tail call void @llvm.assume(i1 %0)
+     %0 = load ptr, ptr %vtable, align 8
+     tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
+     ret void
+
+   IR after the optimization:
+
+   .. code-block:: llvm
+
+     %vtable = load ptr, ptr %BV, align 8, !tbaa !12
+     %0 = load ptr, ptr %vtable, align 8
+     %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev
+     br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15
+     if.true.direct_targ:                              ; preds = %entry
+       tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV)
+       br label %if.end.icp
+     if.false.orig_indirect:                           ; preds = %entry
+       tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV)
+       br label %if.end.icp
+     if.end.icp:                                       ; preds = %if.false.orig_indirect, %if.true.direct_targ
+       ret void
+
+   This feature is temporarily ignored at the LLVM side when LTO is enabled.
+   TODO: Update the comment when the LLVM side supports this feature for LTO.
+   This feature is turned off by default.
+
 .. option:: -f[no-]unique-source-file-names
 
    When enabled, allows the compiler to assume that each object file
@@ -5216,6 +5266,8 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -fstandalone-debug      Emit full debug info for all types used by the program
       -fstrict-aliasing	      Enable optimizations based on strict aliasing rules
       -fsyntax-only           Run the preprocessor, parser and semantic analysis stages
+      -fdevirtualize-speculatively
+                              Enables speculative devirtualization optimization.
       -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto
       -gcodeview-ghash        Emit type record hashes in a .debug$H section
       -gcodeview              Generate CodeView debug information

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
@@ -364,6 +364,8 @@ VALUE_CODEGENOPT(WarnStackSize     , 32, UINT_MAX, Benign) ///< Set via -fwarn-s
 CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used
 CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists.
 
+CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative
+                                                    /// devirtualization optimization.
 CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program
                                               ///  vtable optimization.
 

diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
@@ -4512,6 +4512,13 @@ defm new_infallible : BoolFOption<"new-infallible",
   BothFlags<[], [ClangOption, CC1Option],
           " treating throwing global C++ operator new as always returning valid memory "
   "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>;
+defm devirtualize_speculatively
+    : BoolFOption<"devirtualize-speculatively",
+                  CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse,
+                  PosFlag<SetTrue, [], [],
+                          "Enables speculative devirtualization optimization.">,
+                  NegFlag<SetFalse>,
+                  BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
 defm whole_program_vtables : BoolFOption<"whole-program-vtables",
   CodeGenOpts<"WholeProgramVTables">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
@@ -7122,9 +7129,8 @@ defm variable_expansion_in_unroller : BooleanFFlag<"variable-expansion-in-unroll
     Group<clang_ignored_gcc_optimization_f_Group>;
 defm web : BooleanFFlag<"web">, Group<clang_ignored_gcc_optimization_f_Group>;
 defm whole_program : BooleanFFlag<"whole-program">, Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize : BooleanFFlag<"devirtualize">, Group<clang_ignored_gcc_optimization_f_Group>;
-defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">,
-    Group<clang_ignored_gcc_optimization_f_Group>;
+defm devirtualize : BooleanFFlag<"devirtualize">,
+                    Group<clang_ignored_gcc_optimization_f_Group>;
 
 // Generic gfortran options.
 def A_DASH : Joined<["-"], "A-">, Group<gfortran_Group>;

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
@@ -940,6 +940,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   // non-integrated assemblers don't recognize .cgprofile section.
   PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
   PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO;
+  PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively;
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;

diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
@@ -2827,10 +2827,15 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
                                                    SourceLocation Loc) {
   if (SanOpts.has(SanitizerKind::CFIVCall))
     EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
-  else if (CGM.getCodeGenOpts().WholeProgramVTables &&
-           // Don't insert type test assumes if we are forcing public
-           // visibility.
-           !CGM.AlwaysHasLTOVisibilityPublic(RD)) {
+  // Emit the intrinsics of (type_test and assume) for the features of WPD and
+  // speculative devirtualization. For WPD, emit the intrinsics only for the
+  // case of non_public LTO visibility.
+  // TODO: refactor this condition and similar ones into a function (e.g.,
+  // ShouldEmitDevirtualizationMD) to encapsulate the details of the different
+  // types of devirtualization.
+  else if ((CGM.getCodeGenOpts().WholeProgramVTables &&
+            !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+           CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CanQualType Ty = CGM.getContext().getCanonicalTagType(RD);
     llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty);
     llvm::Value *TypeId =
@@ -2988,8 +2993,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
 }
 
 bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
-  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
-      !CGM.HasHiddenLTOVisibility(RD))
+  if ((!CGM.getCodeGenOpts().WholeProgramVTables ||
+       !CGM.HasHiddenLTOVisibility(RD)) &&
+      !CGM.getCodeGenOpts().DevirtualizeSpeculatively)
     return false;
 
   if (CGM.getCodeGenOpts().VirtualFunctionElimination)

diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
@@ -1363,10 +1363,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel(
 void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
                                            llvm::GlobalVariable *VTable,
                                            const VTableLayout &VTLayout) {
-  // Emit type metadata on vtables with LTO or IR instrumentation.
+  // Emit type metadata on vtables with LTO or IR instrumentation or
+  // speculative devirtualization.
   // In IR instrumentation, the type metadata is used to find out vtable
   // definitions (for type profiling) among all global variables.
-  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
+  if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() &&
+      !getCodeGenOpts().DevirtualizeSpeculatively)
     return;
 
   CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());

diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -716,10 +716,14 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 
   bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
                            CGM.HasHiddenLTOVisibility(RD);
+  // TODO: Update this name not to be restricted to WPD only
+  // as we now emit the vtable info info for speculative devirtualization as
+  // well.
   bool ShouldEmitWPDInfo =
-      CGM.getCodeGenOpts().WholeProgramVTables &&
-      // Don't insert type tests if we are forcing public visibility.
-      !CGM.AlwaysHasLTOVisibilityPublic(RD);
+      (CGM.getCodeGenOpts().WholeProgramVTables &&
+       // Don't insert type tests if we are forcing public visibility.
+       !CGM.AlwaysHasLTOVisibilityPublic(RD)) ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively;
   llvm::Value *VirtualFn = nullptr;
 
   {
@@ -2110,17 +2114,20 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
 
   // Always emit type metadata on non-available_externally definitions, and on
   // available_externally definitions if we are performing whole program
-  // devirtualization. For WPD we need the type metadata on all vtable
-  // definitions to ensure we associate derived classes with base classes
-  // defined in headers but with a strong definition only in a shared library.
+  // devirtualization or speculative devirtualization. We need the type metadata
+  // on all vtable definitions to ensure we associate derived classes with base
+  // classes defined in headers but with a strong definition only in a shared
+  // library.
   if (!VTable->isDeclarationForLinker() ||
-      CGM.getCodeGenOpts().WholeProgramVTables) {
+      CGM.getCodeGenOpts().WholeProgramVTables ||
+      CGM.getCodeGenOpts().DevirtualizeSpeculatively) {
     CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout);
     // For available_externally definitions, add the vtable to
     // @llvm.compiler.used so that it isn't deleted before whole program
     // analysis.
     if (VTable->isDeclarationForLinker()) {
-      assert(CGM.getCodeGenOpts().WholeProgramVTables);
+      assert(CGM.getCodeGenOpts().WholeProgramVTables ||
+             CGM.getCodeGenOpts().DevirtualizeSpeculatively);
       CGM.addCompilerUsedGlobal(VTable);
     }
   }

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7745,6 +7745,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs);
 
+  if (Args.hasFlag(options::OPT_fdevirtualize_speculatively,
+                   options::OPT_fno_devirtualize_speculatively,
+                   /*Default value*/ false))
+    CmdArgs.push_back("-fdevirtualize-speculatively");
+
   bool VirtualFunctionElimination =
       Args.hasFlag(options::OPT_fvirtual_function_elimination,
                    options::OPT_fno_virtual_function_elimination, false);

diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp
@@ -0,0 +1,78 @@
+// Test that Clang emits vtable metadata when speculative devirtualization is enabled.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=CHECK  %s
+
+struct A {
+  A();
+  virtual void f();
+};
+
+struct B : virtual A {
+  B();
+  virtual void g();
+  virtual void h();
+};
+
+namespace {
+
+struct D : B {
+  D();
+  virtual void f();
+  virtual void h();
+};
+
+}
+
+A::A() {}
+B::B() {}
+D::D() {}
+
+void A::f() {
+}
+
+void B::g() {
+}
+
+void D::f() {
+}
+
+void D::h() {
+}
+
+void af(A *a) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A")
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  a->f();
+}
+
+void dg1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B")
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->g();
+}
+
+void df1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11)
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->f();
+}
+
+void dh1(D *d) {
+  // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11)
+  // CHECK-NEXT: call void @llvm.assume(i1 [[P]])
+  d->h();
+}
+
+
+D d;
+
+void foo() {
+  dg1(&d);
+  df1(&d);
+  dh1(&d);
+
+
+  struct FA : A {
+    void f() {}
+  } fa;
+  af(&fa);
+}
diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c
@@ -377,7 +377,6 @@
 // RUN: -ftree-ter                                                            \
 // RUN: -ftree-vrp                                                            \
 // RUN: -fno-devirtualize                                                     \
-// RUN: -fno-devirtualize-speculatively                                       \
 // RUN: -fslp-vectorize-aggressive                                            \
 // RUN: -fno-slp-vectorize-aggressive                                         \
 // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s
@@ -436,7 +435,6 @@
 // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported
 // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported
 // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported
-// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported
 // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored
 // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored
 

diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
@@ -99,6 +99,10 @@ class PipelineTuningOptions {
   // analyses after various module->function or cgscc->function adaptors in the
   // default pipelines.
   bool EagerlyInvalidateAnalyses;
+
+  // Tuning option to enable/disable speculative devirtualization.
+  // Its default value is false.
+  bool DevirtualizeSpeculatively;
 };
 
 /// This class provides access to building LLVM's passes.

diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
   bool UseCommandLine = false;
+  bool DevirtSpeculatively = false;
   WholeProgramDevirtPass()
       : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {}
   WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
-                         const ModuleSummaryIndex *ImportSummary)
-      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+                         const ModuleSummaryIndex *ImportSummary,
+                         bool DevirtSpeculatively = false)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary),
+        DevirtSpeculatively(DevirtSpeculatively) {
     assert(!(ExportSummary && ImportSummary));
   }
   LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &);

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -305,6 +305,13 @@ static cl::opt<std::string> InstrumentColdFuncOnlyPath(
              "with --pgo-instrument-cold-function-only)"),
     cl::Hidden);
 
+// TODO: There is a similar flag in WPD pass, we should consolidate them by
+// parsing the option only once in PassBuilder and share it across both places.
+static cl::opt<bool> EnableDevirtualizeSpeculatively(
+    "enable-devirtualize-speculatively",
+    cl::desc("Enable speculative devirtualization optimization"),
+    cl::init(false));
+
 extern cl::opt<std::string> UseCtxProfile;
 extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
 
@@ -326,6 +333,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   MergeFunctions = EnableMergeFunctions;
   InlinerThreshold = -1;
   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+  DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively;
 }
 
 namespace llvm {
@@ -1655,6 +1663,34 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (!LTOPreLink)
     MPM.addPass(RelLookupTableConverterPass());
 
+  // Add devirtualization pass only when LTO is not enabled, as otherwise
+  // the pass is already enabled in the LTO pipeline.
+  if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
+    // TODO: explore a better pipeline configuration that can improve
+    // compilation time overhead.
+    MPM.addPass(WholeProgramDevirtPass(
+        /*ExportSummary*/ nullptr,
+        /*ImportSummary*/ nullptr,
+        /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
+                                   lowertypetests::DropTestKind::Assume));
+    // Given that the devirtualization creates more opportunities for inlining,
+    // we run the Inliner again here to maximize the optimization gain we
+    // get from devirtualization.
+    // Also, we can't run devirtualization before inlining because the
+    // devirtualization depends on the passes optimizing/eliminating vtable GVs
+    // and those passes are only effective after inlining.
+    if (EnableModuleInliner) {
+      MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+                                    UseInlineAdvisor,
+                                    ThinOrFullLTOPhase::None));
+    } else {
+      MPM.addPass(ModuleInlinerWrapperPass(
+          getInlineParamsFromOptLevel(Level),
+          /* MandatoryFirst */ true,
+          InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner}));
+    }
+  }
   return MPM;
 }