diff --git a/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h new file mode 100644 index 0000000000000..a3e1014b417e5 --- /dev/null +++ b/llvm/include/llvm/Analysis/RuntimeLibcallInfo.h @@ -0,0 +1,60 @@ +//===-- RuntimeLibcallInfo.h - Runtime library information ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_RUNTIMELIBCALLINFO_H +#define LLVM_ANALYSIS_RUNTIMELIBCALLINFO_H + +#include "llvm/IR/RuntimeLibcalls.h" +#include "llvm/Pass.h" + +namespace llvm { + +class LLVM_ABI RuntimeLibraryAnalysis + : public AnalysisInfoMixin { +public: + using Result = RTLIB::RuntimeLibcallsInfo; + + RuntimeLibraryAnalysis() = default; + RuntimeLibraryAnalysis(RTLIB::RuntimeLibcallsInfo &&BaselineInfoImpl) + : LibcallsInfo(std::move(BaselineInfoImpl)) {} + explicit RuntimeLibraryAnalysis(const Triple &T) : LibcallsInfo(T) {} + + LLVM_ABI RTLIB::RuntimeLibcallsInfo run(const Module &M, + ModuleAnalysisManager &); + +private: + friend AnalysisInfoMixin; + LLVM_ABI static AnalysisKey Key; + + RTLIB::RuntimeLibcallsInfo LibcallsInfo; +}; + +class LLVM_ABI RuntimeLibraryInfoWrapper : public ImmutablePass { + RuntimeLibraryAnalysis RTLA; + std::optional RTLCI; + +public: + static char ID; + RuntimeLibraryInfoWrapper(); + explicit RuntimeLibraryInfoWrapper(const Triple &T); + explicit RuntimeLibraryInfoWrapper(const RTLIB::RuntimeLibcallsInfo &RTLCI); + + const RTLIB::RuntimeLibcallsInfo &getRTLCI(const Module &M) { + ModuleAnalysisManager DummyMAM; + RTLCI = RTLA.run(M, DummyMAM); + return *RTLCI; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +LLVM_ABI ModulePass *createRuntimeLibraryInfoWrapperPass(); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 5241a51dd8cd8..d7921c3eb3f7c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -46,6 +46,7 @@ class SelectionDAGISel { public: TargetMachine &TM; const TargetLibraryInfo *LibInfo; + const RTLIB::RuntimeLibcallsInfo *RuntimeLibCallInfo; std::unique_ptr FuncInfo; std::unique_ptr SwiftError; MachineFunction *MF; diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 78e4b1723aafa..c822b6530a441 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -9,6 +9,8 @@ // This file implements a common interface to work with library calls into a // runtime that may be emitted by a given backend. // +// FIXME: This should probably move to Analysis +// //===----------------------------------------------------------------------===// #ifndef LLVM_IR_RUNTIME_LIBCALLS_H @@ -20,6 +22,7 @@ #include "llvm/ADT/StringTable.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -74,6 +77,8 @@ struct RuntimeLibcallsInfo { public: friend class llvm::LibcallLoweringInfo; + RuntimeLibcallsInfo() = default; + explicit RuntimeLibcallsInfo( const Triple &TT, ExceptionHandling ExceptionModel = ExceptionHandling::None, @@ -89,6 +94,11 @@ struct RuntimeLibcallsInfo { initLibcalls(TT, ExceptionModel, FloatABI, EABIVersion, ABIName); } + explicit RuntimeLibcallsInfo(const Module &M); + + bool invalidate(Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &); + /// Get the libcall routine name for the specified libcall implementation. static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl) { if (CallImpl == RTLIB::Unsupported) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c8196d8a7ef48..10a4d8525a9e8 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -290,6 +290,7 @@ LLVM_ABI void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &); LLVM_ABI void initializeRenameIndependentSubregsLegacyPass(PassRegistry &); LLVM_ABI void initializeReplaceWithVeclibLegacyPass(PassRegistry &); LLVM_ABI void initializeResetMachineFunctionPass(PassRegistry &); +LLVM_ABI void initializeRuntimeLibraryInfoWrapperPass(PassRegistry &); LLVM_ABI void initializeSCEVAAWrapperPassPass(PassRegistry &); LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &); LLVM_ABI void initializeSafeStackLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 2f20792568e63..bd7cd39ebb743 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -20,6 +20,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -638,6 +639,8 @@ Error CodeGenPassBuilder::buildPipeline( /*Force=*/true); addIRPass(RequireAnalysisPass(), /*Force=*/true); + addIRPass(RequireAnalysisPass(), + /*Force=*/true); addISelPasses(addIRPass); } diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 9f5daf32be9a0..aaac2cf187281 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -63,6 +63,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); initializeRegionOnlyPrinterPass(Registry); + initializeRuntimeLibraryInfoWrapperPass(Registry); initializeSCEVAAWrapperPassPass(Registry); initializeScalarEvolutionWrapperPassPass(Registry); initializeStackSafetyGlobalInfoWrapperPassPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 16dd6f8b86006..88ebd65ec46af 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -137,6 +137,7 @@ add_llvm_component_library(LLVMAnalysis RegionPass.cpp RegionPrinter.cpp ReplayInlineAdvisor.cpp + RuntimeLibcallInfo.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp diff --git a/llvm/lib/Analysis/RuntimeLibcallInfo.cpp b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp new file mode 100644 index 0000000000000..6fb4119aa73f2 --- /dev/null +++ b/llvm/lib/Analysis/RuntimeLibcallInfo.cpp @@ -0,0 +1,43 @@ +//===- RuntimeLibcallInfo.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RuntimeLibcallInfo.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +AnalysisKey RuntimeLibraryAnalysis::Key; + +RTLIB::RuntimeLibcallsInfo +RuntimeLibraryAnalysis::run(const Module &M, ModuleAnalysisManager &) { + return RTLIB::RuntimeLibcallsInfo(M); +} + +INITIALIZE_PASS(RuntimeLibraryInfoWrapper, "runtime-library-info", + "Runtime Library Function Analysis", false, true) + +RuntimeLibraryInfoWrapper::RuntimeLibraryInfoWrapper() + : ImmutablePass(ID), RTLA(RTLIB::RuntimeLibcallsInfo(Triple())) {} + +char RuntimeLibraryInfoWrapper::ID = 0; + +ModulePass *llvm::createRuntimeLibraryInfoWrapperPass() { + return new RuntimeLibraryInfoWrapper(); +} + +void RuntimeLibraryInfoWrapper::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +// Assume this is stable unless explicitly invalidated. +bool RTLIB::RuntimeLibcallsInfo::invalidate( + Module &M, const PreservedAnalyses &PA, + ModuleAnalysisManager::Invalidator &) { + auto PAC = PA.getChecker(); + return !PAC.preservedWhenStateless(); +} diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index f44eb227133ae..9386ffe7791a3 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/SimplifyQuery.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -1092,6 +1093,8 @@ class ExpandFpLegacyPass : public FunctionPass { auto *TM = &getAnalysis().getTM(); auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); AssumptionCache *AC = nullptr; + const RTLIB::RuntimeLibcallsInfo *Libcalls = + &getAnalysis().getRTLCI(*F.getParent()); if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone()) AC = &getAnalysis().getAssumptionCache(F); @@ -1104,6 +1107,7 @@ class ExpandFpLegacyPass : public FunctionPass { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } }; } // namespace @@ -1126,6 +1130,15 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) { AssumptionCache *AC = nullptr; if (OptLevel != CodeGenOptLevel::None) AC = &FAM.getResult(F); + + auto &MAMProxy = FAM.getResult(F); + const RTLIB::RuntimeLibcallsInfo *Libcalls = + MAMProxy.getCachedResult(*F.getParent()); + if (!Libcalls) { + F.getContext().emitError("'runtime-libcall-info' analysis required"); + return PreservedAnalyses::all(); + } + return runImpl(F, TLI, AC) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } @@ -1133,6 +1146,7 @@ PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) { char ExpandFpLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp", "Expand certain fp instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(RuntimeLibraryInfoWrapper) INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false) FunctionPass *llvm::createExpandFpPass(CodeGenOptLevel OptLevel) { diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 2fb01a4f95fea..f4c5c6ff35af6 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -9,7 +9,7 @@ #include "llvm/IR/RuntimeLibcalls.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringTable.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/ARMTargetParser.h" @@ -25,6 +25,11 @@ using namespace RTLIB; #define DEFINE_GET_LOOKUP_LIBCALL_IMPL_NAME #include "llvm/IR/RuntimeLibcalls.inc" +RuntimeLibcallsInfo::RuntimeLibcallsInfo(const Module &M) + : RuntimeLibcallsInfo(M.getTargetTriple()) { + // TODO: Consider module flags +} + /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3c9a27ac24015..40ceb6f6ae28f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -67,6 +67,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RuntimeLibcallInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionDivision.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 1853cdd45d0ee..d870f99aad552 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -35,6 +35,7 @@ MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("reg-usage", PhysicalRegisterUsageAnalysis()) +MODULE_ANALYSIS("runtime-libcall-info", RuntimeLibraryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) diff --git a/llvm/lib/Target/Target.cpp b/llvm/lib/Target/Target.cpp index ec673ef4cda52..7387571418c8d 100644 --- a/llvm/lib/Target/Target.cpp +++ b/llvm/lib/Target/Target.cpp @@ -37,6 +37,7 @@ inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) { void llvm::initializeTarget(PassRegistry &Registry) { initializeTargetLibraryInfoWrapperPassPass(Registry); + initializeRuntimeLibraryInfoWrapperPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); } diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 704ea37117f32..8e7389ace9c5c 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -9,11 +9,11 @@ ; RUN: | FileCheck -check-prefix=GCN-O3 %s -; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O0: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,unreachableblockelim,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O2: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) +; GCN-O3: require,require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation,reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),free-machine-function)) define void @empty() { ret void diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll index f70f0d25f172d..4d302f63e1f0b 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem-inf.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK %s -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck --check-prefixes CHECK,OPT1 %s ; Check the handling of potentially infinite numerators in the frem ; expansion at different optimization levels and with different diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll index 4c0f9db147c96..56ccfb6bf454c 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/frem.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" %s -S -o - | FileCheck %s define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_f16( diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll new file mode 100644 index 0000000000000..5cad68e66d3ee --- /dev/null +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/missing-analysis.ll @@ -0,0 +1,6 @@ +; RUN: not opt -mtriple=amdgcn -passes=expand-fp -disable-output %s 2>&1 | FileCheck %s + +; CHECK: 'runtime-libcall-info' analysis required +define void @empty() { + ret void +} diff --git a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll index 03cafd4ff1160..b3ee0a94ed348 100644 --- a/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll +++ b/llvm/test/Transforms/ExpandFp/AMDGPU/pass-parameters.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null -; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" -disable-output %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" -disable-output %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" -disable-output %s +; RUN: opt -mtriple=amdgcn -passes="require,expand-fp" -disable-output %s ; RUN: not opt -mtriple=amdgcn -passes="expand-fp" %s -S -o /dev/null 2>&1 | FileCheck --check-prefix=TOO-LARGE %s ; TOO-LARGE: {{.*}}invalid optimization level for expand-fp pass: 4 diff --git a/llvm/test/tools/llc/new-pm/start-stop.ll b/llvm/test/tools/llc/new-pm/start-stop.ll index e4c454900fd38..0e68cdbe67b63 100644 --- a/llvm/test/tools/llc/new-pm/start-stop.ll +++ b/llvm/test/tools/llc/new-pm/start-stop.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -filetype=null %s | FileCheck --match-full-lines %s --check-prefix=NULL ; RUN: llc -mtriple=x86_64-pc-linux-gnu -enable-new-pm -print-pipeline-passes -start-before=mergeicmps -stop-after=gc-lowering -o /dev/null %s | FileCheck --match-full-lines %s --check-prefix=OBJ -; NULL: require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify) -; OBJ: require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function) +; NULL: require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify) +; OBJ: require,require,require,require,function(verify,mergeicmps,expand-memcmp,gc-lowering,verify),PrintMIRPreparePass,function(machine-function(print),free-machine-function)