diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6838e926f4c9d..6cd3030a4c8c7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -343,6 +343,7 @@ Modified Compiler Flags ----------------------- - The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509) - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of this limit. (#GH160440) +- The `-fdevirtualize-speculatively` compiler flag is now supported to enable speculative devirtualization of virtual function calls, it's disabled by default. (#GH159685) Removed Compiler Flags ------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 42665da413660..c624efb26f67d 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2352,6 +2352,56 @@ are listed below. pure ThinLTO, as all split regular LTO modules are merged and LTO linked with regular LTO. +.. option:: -fdevirtualize-speculatively + + Enable speculative devirtualization optimization where a virtual call + can be transformed into a direct call under the assumption that its + object is of a particular type. A runtime check is inserted to validate + the assumption before making the direct call, and if the check fails, + the original virtual call is made instead. This optimization can enable + more inlining opportunities and better optimization of the direct call. + This is different from whole program devirtualization optimization + that rely on global analysis and hidden visibility of the objects to prove + that the object is always of a particular type at a virtual call site. + This optimization doesn't require global analysis or hidden visibility. + This optimization doesn't devirtualize all virtual calls, but only + when there's a single implementation of the virtual function in the module. + There could be a single implementation of the virtual function + either because the function is not overridden in any derived class, + or because all objects are instances of the same class/type. + + Ex of IR before the optimization: + + .. code-block:: llvm + + %vtable = load ptr, ptr %BV, align 8, !tbaa !6 + %0 = tail call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS4Base") + tail call void @llvm.assume(i1 %0) + %0 = load ptr, ptr %vtable, align 8 + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + ret void + + IR after the optimization: + + .. code-block:: llvm + + %vtable = load ptr, ptr %BV, align 8, !tbaa !12 + %0 = load ptr, ptr %vtable, align 8 + %1 = icmp eq ptr %0, @_ZN4Base17virtual_function1Ev + br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect, !prof !15 + if.true.direct_targ: ; preds = %entry + tail call void @_ZN4Base17virtual_function1Ev(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.false.orig_indirect: ; preds = %entry + tail call void %0(ptr noundef nonnull align 8 dereferenceable(8) %BV) + br label %if.end.icp + if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ret void + + This feature is temporarily ignored at the LLVM side when LTO is enabled. + TODO: Update the comment when the LLVM side supports this feature for LTO. + This feature is turned off by default. + .. option:: -f[no-]unique-source-file-names When enabled, allows the compiler to assume that each object file @@ -5216,6 +5266,8 @@ Execute ``clang-cl /?`` to see a list of supported options: -fstandalone-debug Emit full debug info for all types used by the program -fstrict-aliasing Enable optimizations based on strict aliasing rules -fsyntax-only Run the preprocessor, parser and semantic analysis stages + -fdevirtualize-speculatively + Enables speculative devirtualization optimization. -fwhole-program-vtables Enables whole-program vtable optimization. Requires -flto -gcodeview-ghash Emit type record hashes in a .debug$H section -gcodeview Generate CodeView debug information diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 76a6463881c6f..a059803c433e3 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -364,6 +364,8 @@ VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX, Benign) ///< Set via -fwarn-s CODEGENOPT(NoStackArgProbe, 1, 0, Benign) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(EmitLLVMUseLists, 1, 0, Benign) ///< Control whether to serialize use-lists. +CODEGENOPT(DevirtualizeSpeculatively, 1, 0, Benign) ///< Whether to apply the speculative + /// devirtualization optimization. CODEGENOPT(WholeProgramVTables, 1, 0, Benign) ///< Whether to apply whole-program /// vtable optimization. diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index d31bd7d6be322..df238a8087a46 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4512,6 +4512,13 @@ defm new_infallible : BoolFOption<"new-infallible", BothFlags<[], [ClangOption, CC1Option], " treating throwing global C++ operator new as always returning valid memory " "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>; +defm devirtualize_speculatively + : BoolFOption<"devirtualize-speculatively", + CodeGenOpts<"DevirtualizeSpeculatively">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CLOption, CC1Option]>>; defm whole_program_vtables : BoolFOption<"whole-program-vtables", CodeGenOpts<"WholeProgramVTables">, DefaultFalse, PosFlag; defm web : BooleanFFlag<"web">, Group; defm whole_program : BooleanFFlag<"whole-program">, Group; -defm devirtualize : BooleanFFlag<"devirtualize">, Group; -defm devirtualize_speculatively : BooleanFFlag<"devirtualize-speculatively">, - Group; +defm devirtualize : BooleanFFlag<"devirtualize">, + Group; // Generic gfortran options. def A_DASH : Joined<["-"], "A-">, Group; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ec609db8d3a3c..df716e5bce23f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -940,6 +940,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; + PTO.DevirtualizeSpeculatively = CodeGenOpts.DevirtualizeSpeculatively; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index f782b0cd17da4..96fde10f24f32 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2827,10 +2827,15 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, SourceLocation Loc) { if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); - else if (CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type test assumes if we are forcing public - // visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD)) { + // Emit the intrinsics of (type_test and assume) for the features of WPD and + // speculative devirtualization. For WPD, emit the intrinsics only for the + // case of non_public LTO visibility. + // TODO: refactor this condition and similar ones into a function (e.g., + // ShouldEmitDevirtualizationMD) to encapsulate the details of the different + // types of devirtualization. + else if ((CGM.getCodeGenOpts().WholeProgramVTables && + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CanQualType Ty = CGM.getContext().getCanonicalTagType(RD); llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(Ty); llvm::Value *TypeId = @@ -2988,8 +2993,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, } bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { - if (!CGM.getCodeGenOpts().WholeProgramVTables || - !CGM.HasHiddenLTOVisibility(RD)) + if ((!CGM.getCodeGenOpts().WholeProgramVTables || + !CGM.HasHiddenLTOVisibility(RD)) && + !CGM.getCodeGenOpts().DevirtualizeSpeculatively) return false; if (CGM.getCodeGenOpts().VirtualFunctionElimination) diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 3fbac308a9178..91550d0d31d83 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -1363,10 +1363,12 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel( void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { - // Emit type metadata on vtables with LTO or IR instrumentation. + // Emit type metadata on vtables with LTO or IR instrumentation or + // speculative devirtualization. // In IR instrumentation, the type metadata is used to find out vtable // definitions (for type profiling) among all global variables. - if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr()) + if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr() && + !getCodeGenOpts().DevirtualizeSpeculatively) return; CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 65c47633bc5c4..24ff12c18d69a 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -716,10 +716,14 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + // TODO: Update this name not to be restricted to WPD only + // as we now emit the vtable info info for speculative devirtualization as + // well. bool ShouldEmitWPDInfo = - CGM.getCodeGenOpts().WholeProgramVTables && - // Don't insert type tests if we are forcing public visibility. - !CGM.AlwaysHasLTOVisibilityPublic(RD); + (CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public visibility. + !CGM.AlwaysHasLTOVisibilityPublic(RD)) || + CGM.getCodeGenOpts().DevirtualizeSpeculatively; llvm::Value *VirtualFn = nullptr; { @@ -2110,17 +2114,20 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // Always emit type metadata on non-available_externally definitions, and on // available_externally definitions if we are performing whole program - // devirtualization. For WPD we need the type metadata on all vtable - // definitions to ensure we associate derived classes with base classes - // defined in headers but with a strong definition only in a shared library. + // devirtualization or speculative devirtualization. We need the type metadata + // on all vtable definitions to ensure we associate derived classes with base + // classes defined in headers but with a strong definition only in a shared + // library. if (!VTable->isDeclarationForLinker() || - CGM.getCodeGenOpts().WholeProgramVTables) { + CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively) { CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); // For available_externally definitions, add the vtable to // @llvm.compiler.used so that it isn't deleted before whole program // analysis. if (VTable->isDeclarationForLinker()) { - assert(CGM.getCodeGenOpts().WholeProgramVTables); + assert(CGM.getCodeGenOpts().WholeProgramVTables || + CGM.getCodeGenOpts().DevirtualizeSpeculatively); CGM.addCompilerUsedGlobal(VTable); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 0380568412e62..7187d1a158e01 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7745,6 +7745,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, addOpenMPHostOffloadingArgs(C, JA, Args, CmdArgs); + if (Args.hasFlag(options::OPT_fdevirtualize_speculatively, + options::OPT_fno_devirtualize_speculatively, + /*Default value*/ false)) + CmdArgs.push_back("-fdevirtualize-speculatively"); + bool VirtualFunctionElimination = Args.hasFlag(options::OPT_fvirtual_function_elimination, options::OPT_fno_virtual_function_elimination, false); diff --git a/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp new file mode 100644 index 0000000000000..20d2ab9f46fe5 --- /dev/null +++ b/clang/test/CodeGenCXX/speculative-devirt-metadata.cpp @@ -0,0 +1,78 @@ +// Test that Clang emits vtable metadata when speculative devirtualization is enabled. +// RUN: %clang_cc1 -triple x86_64-unknown-linux -fdevirtualize-speculatively -emit-llvm -o - %s | FileCheck --check-prefix=CHECK %s + +struct A { + A(); + virtual void f(); +}; + +struct B : virtual A { + B(); + virtual void g(); + virtual void h(); +}; + +namespace { + +struct D : B { + D(); + virtual void f(); + virtual void h(); +}; + +} + +A::A() {} +B::B() {} +D::D() {} + +void A::f() { +} + +void B::g() { +} + +void D::f() { +} + +void D::h() { +} + +void af(A *a) { + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1A") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + a->f(); +} + +void dg1(D *d) { + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"_ZTS1B") + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + d->g(); +} + +void df1(D *d) { + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + d->f(); +} + +void dh1(D *d) { + // CHECK: [[P:%[^ ]*]] = call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !11) + // CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + d->h(); +} + + +D d; + +void foo() { + dg1(&d); + df1(&d); + dh1(&d); + + + struct FA : A { + void f() {} + } fa; + af(&fa); +} diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c index 765f9d6ae3212..e5a23270ea732 100644 --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -377,7 +377,6 @@ // RUN: -ftree-ter \ // RUN: -ftree-vrp \ // RUN: -fno-devirtualize \ -// RUN: -fno-devirtualize-speculatively \ // RUN: -fslp-vectorize-aggressive \ // RUN: -fno-slp-vectorize-aggressive \ // RUN: %s 2>&1 | FileCheck --check-prefix=CHECK-WARNING %s @@ -436,7 +435,6 @@ // CHECK-WARNING-DAG: optimization flag '-ftree-ter' is not supported // CHECK-WARNING-DAG: optimization flag '-ftree-vrp' is not supported // CHECK-WARNING-DAG: optimization flag '-fno-devirtualize' is not supported -// CHECK-WARNING-DAG: optimization flag '-fno-devirtualize-speculatively' is not supported // CHECK-WARNING-DAG: the flag '-fslp-vectorize-aggressive' has been deprecated and will be ignored // CHECK-WARNING-DAG: the flag '-fno-slp-vectorize-aggressive' has been deprecated and will be ignored diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 8fa21f2cb2dd6..00d4874d5109b 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -99,6 +99,10 @@ class PipelineTuningOptions { // analyses after various module->function or cgscc->function adaptors in the // default pipelines. bool EagerlyInvalidateAnalyses; + + // Tuning option to enable/disable speculative devirtualization. + // Its default value is false. + bool DevirtualizeSpeculatively; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 7a03405b4f462..2e33a4098be1b 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -226,11 +226,14 @@ struct WholeProgramDevirtPass : public PassInfoMixin { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; bool UseCommandLine = false; + bool DevirtSpeculatively = false; WholeProgramDevirtPass() : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively = false) + : ExportSummary(ExportSummary), ImportSummary(ImportSummary), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); } LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c6beb3fdf09bd..4de527d9ef85e 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -305,6 +305,13 @@ static cl::opt InstrumentColdFuncOnlyPath( "with --pgo-instrument-cold-function-only)"), cl::Hidden); +// TODO: There is a similar flag in WPD pass, we should consolidate them by +// parsing the option only once in PassBuilder and share it across both places. +static cl::opt EnableDevirtualizeSpeculatively( + "enable-devirtualize-speculatively", + cl::desc("Enable speculative devirtualization optimization"), + cl::init(false)); + extern cl::opt UseCtxProfile; extern cl::opt PGOInstrumentColdFunctionOnly; @@ -326,6 +333,7 @@ PipelineTuningOptions::PipelineTuningOptions() { MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; + DevirtualizeSpeculatively = EnableDevirtualizeSpeculatively; } namespace llvm { @@ -1655,6 +1663,34 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + // Add devirtualization pass only when LTO is not enabled, as otherwise + // the pass is already enabled in the LTO pipeline. + if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) { + // TODO: explore a better pipeline configuration that can improve + // compilation time overhead. + MPM.addPass(WholeProgramDevirtPass( + /*ExportSummary*/ nullptr, + /*ImportSummary*/ nullptr, + /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively)); + MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, + lowertypetests::DropTestKind::Assume)); + // Given that the devirtualization creates more opportunities for inlining, + // we run the Inliner again here to maximize the optimization gain we + // get from devirtualization. + // Also, we can't run devirtualization before inlining because the + // devirtualization depends on the passes optimizing/eliminating vtable GVs + // and those passes are only effective after inlining. + if (EnableModuleInliner) { + MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), + UseInlineAdvisor, + ThinOrFullLTOPhase::None)); + } else { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParamsFromOptLevel(Level), + /* MandatoryFirst */ true, + InlineContext{ThinOrFullLTOPhase::None, InlinePass::CGSCCInliner})); + } + } return MPM; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 4642da0abdc13..7aa90eefd0d96 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -628,9 +628,11 @@ struct DevirtModule { std::map NumUnsafeUsesForTypeTest; PatternList FunctionsToSkip; + const bool DevirtSpeculatively; DevirtModule(Module &M, ModuleAnalysisManager &MAM, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) + const ModuleSummaryIndex *ImportSummary, + bool DevirtSpeculatively) : M(M), MAM(MAM), FAM(MAM.getResult(M).getManager()), ExportSummary(ExportSummary), ImportSummary(ImportSummary), @@ -643,7 +645,8 @@ struct DevirtModule { RemarksEnabled(areRemarksEnabled()), OREGetter([&](Function &F) -> OptimizationRemarkEmitter & { return FAM.getResult(F); - }) { + }), + DevirtSpeculatively(DevirtSpeculatively) { assert(!(ExportSummary && ImportSummary)); FunctionsToSkip.init(SkipFunctionNames); } @@ -757,7 +760,8 @@ struct DevirtModule { // Lower the module using the action and summary passed as command line // arguments. For testing purposes only. - static bool runForTesting(Module &M, ModuleAnalysisManager &MAM); + static bool runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively); }; struct DevirtIndex { @@ -800,11 +804,22 @@ struct DevirtIndex { PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &MAM) { if (UseCommandLine) { - if (!DevirtModule::runForTesting(M, MAM)) + if (!DevirtModule::runForTesting(M, MAM, ClDevirtualizeSpeculatively)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - if (!DevirtModule(M, MAM, ExportSummary, ImportSummary).run()) + + std::optional Index; + if (!ExportSummary && !ImportSummary && DevirtSpeculatively) { + // Build the ExportSummary from the module. + assert(!ExportSummary && + "ExportSummary is expected to be empty in non-LTO mode"); + ProfileSummaryInfo PSI(M); + Index.emplace(buildModuleSummaryIndex(M, nullptr, &PSI)); + ExportSummary = Index.has_value() ? &Index.value() : nullptr; + } + if (!DevirtModule(M, MAM, ExportSummary, ImportSummary, DevirtSpeculatively) + .run()) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } @@ -1002,7 +1017,8 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) { return ErrorSuccess(); } -bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { +bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM, + bool DevirtSpeculatively) { std::unique_ptr Summary = std::make_unique(/*HaveGVs=*/false); @@ -1031,7 +1047,8 @@ bool DevirtModule::runForTesting(Module &M, ModuleAnalysisManager &MAM) { ClSummaryAction == PassSummaryAction::Export ? Summary.get() : nullptr, ClSummaryAction == PassSummaryAction::Import ? Summary.get() - : nullptr) + : nullptr, + DevirtSpeculatively) .run(); if (!ClWriteSummary.empty()) { @@ -1095,10 +1112,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // Without DevirtSpeculatively, we cannot perform whole program // devirtualization analysis on a vtable with public LTO visibility. - if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + if (!DevirtSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1119,7 +1136,7 @@ bool DevirtModule::tryFindVirtualCallTargets( // In most cases empty functions will be overridden by the // implementation of the derived class, so we can skip them. - if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + if (DevirtSpeculatively && Fn->getReturnType()->isVoidTy() && Fn->getInstructionCount() <= 1) continue; @@ -1240,8 +1257,7 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, // add support to compare the virtual function pointer to the // devirtualized target. In case of a mismatch, fall back to indirect // call. - if (DevirtCheckMode == WPDCheckMode::Fallback || - ClDevirtualizeSpeculatively) { + if (DevirtCheckMode == WPDCheckMode::Fallback || DevirtSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -2365,7 +2381,7 @@ bool DevirtModule::run() { Function *PublicTypeTestFunc = nullptr; // If we are in speculative devirtualization mode, we can work on the public // type test intrinsics. - if (ClDevirtualizeSpeculatively) + if (DevirtSpeculatively) PublicTypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeTestFunc = @@ -2501,7 +2517,7 @@ bool DevirtModule::run() { // Out of speculative devirtualization mode, Try to apply virtual constant // propagation or branch funneling. // TODO: This should eventually be enabled for non-public type tests. - if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { + if (!SingleImplDevirt && !DevirtSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll new file mode 100644 index 0000000000000..98df729696de9 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/speculative-devirt-then-inliner.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -O3 -enable-devirtualize-speculatively %s 2>&1 | FileCheck %s + +; Test that the devirtualized calls are inlined. + +@vt1 = constant [1 x ptr] [ptr @vf], !type !0 +@vt2 = constant [1 x ptr] [ptr @vf2], !type !1 + + +define i1 @vf(ptr %this) { + ret i1 true +} + +define i1 @vf2(ptr %this) { + ret i1 false +} + +; CHECK: define i1 @call +define i1 @call(ptr %obj) #1 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + ; if.true.direct_targ: ; preds = %0 + ; br label %if.end.icp + ; if.false.orig_indirect: ; preds = %0 + ; %res = tail call i1 %fptr(ptr nonnull %obj) + ; br label %if.end.icp + ; if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ; %2 = phi i1 [ %res, %if.false.orig_indirect ], [ true, %if.true.direct_targ ] + ; ret i1 %2 + %res = call i1 %fptr(ptr %obj) + ret i1 %res +} + + +; CHECK: define i1 @call1 +define i1 @call1(ptr %obj) #1 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + ; if.true.direct_targ: ; preds = %0 + ; br label %if.end.icp + ; if.false.orig_indirect: ; preds = %0 + ; %res = tail call i1 %fptr(ptr nonnull %obj) + ; br label %if.end.icp + ; if.end.icp: ; preds = %if.false.orig_indirect, %if.true.direct_targ + ; %2 = phi i1 [ %res, %if.false.orig_indirect ], [ false, %if.true.direct_targ ] + ; ret i1 %2 + %res = call i1 %fptr(ptr %obj) + ret i1 %res +} + + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid1"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll new file mode 100644 index 0000000000000..d8781d5686b53 --- /dev/null +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-metadata.ll @@ -0,0 +1,64 @@ +; Test that the needed intrinsics for devirtualization are preserved and not dropped by other +; optimizations. + +; RUN: opt -S -O3 %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +@vt1 = constant [1 x ptr] [ptr @vf], !type !8 +@vt2 = constant [1 x ptr] [ptr @vf2], !type !12 + +define i1 @vf(ptr %this) #0 !dbg !7 { + ret i1 true +} + +define i1 @vf2(ptr %this) !dbg !11 { + ret i1 false +} + +define void @call(ptr %obj) #1 !dbg !5 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.public.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.public.type.test(ptr %vtable, metadata !"typeid") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable + call i1 %fptr(ptr %obj), !dbg !6 + ret void +} + +define void @call1(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + ; CHECK: [[P:%[^ ]*]] = tail call i1 @llvm.type.test(ptr [[VT:%[^ ]*]], metadata !"typeid1") + ; CHECK-NEXT: call void @llvm.assume(i1 [[P]]) + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr %vtable, align 8 + %1 = call i1 %fptr(ptr %obj), !dbg !10 + ret void +} + +declare i1 @llvm.type.test(ptr, metadata) +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 4.0.0 (trunk 278098)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "devirt-single.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 4.0.0 (trunk 278098)"} +!5 = distinct !DISubprogram(name: "call", linkageName: "_Z4callPv", scope: !1, file: !1, line: 29, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!6 = !DILocation(line: 30, column: 32, scope: !5) +!7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEb", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!8 = !{i32 0, !"typeid"} + +!9 = distinct !DISubprogram(name: "call1", linkageName: "_Z5call1Pv", scope: !1, file: !1, line: 31, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 35, column: 32, scope: !9) +!11 = distinct !DISubprogram(name: "vf2", linkageName: "_ZN3vt13vf2Eb", scope: !1, file: !1, line: 23, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!12 = !{i32 0, !"typeid1"} +