diff --git a/llvm/include/llvm/CodeGen/PostRAMachineSink.h b/llvm/include/llvm/CodeGen/PostRAMachineSink.h new file mode 100644 index 0000000000000..2f1f79a0480df --- /dev/null +++ b/llvm/include/llvm/CodeGen/PostRAMachineSink.h @@ -0,0 +1,30 @@ +//===- llvm/CodeGen/PostRAMachineSink.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_POSTRAMACHINESINK_H +#define LLVM_CODEGEN_POSTRAMACHINESINK_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class PostRAMachineSinkingPass + : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_POSTRAMACHINESINK_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e57f062a78a96..c847716647825 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -252,7 +252,7 @@ LLVM_ABI void initializePostDominatorTreeWrapperPassPass(PassRegistry &); LLVM_ABI void initializePostInlineEntryExitInstrumenterPass(PassRegistry &); LLVM_ABI void initializePostMachineSchedulerLegacyPass(PassRegistry &); LLVM_ABI void initializePostRAHazardRecognizerLegacyPass(PassRegistry &); -LLVM_ABI void initializePostRAMachineSinkingPass(PassRegistry &); +LLVM_ABI void initializePostRAMachineSinkingLegacyPass(PassRegistry &); LLVM_ABI void initializePostRASchedulerLegacyPass(PassRegistry &); LLVM_ABI void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &); LLVM_ABI void initializePrintFunctionPassWrapperPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 734372c1af75b..910e25a048815 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -69,6 +69,7 @@ #include "llvm/CodeGen/PHIElimination.h" #include "llvm/CodeGen/PatchableFunction.h" #include "llvm/CodeGen/PeepholeOptimizer.h" +#include "llvm/CodeGen/PostRAMachineSink.h" #include "llvm/CodeGen/PostRASchedulerList.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index e03038921af99..b6c1460313f86 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -164,6 +164,7 @@ MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass()) MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass()) MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM)) +MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass()) MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM)) MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass()) MACHINE_FUNCTION_PASS("print", PrintMIRPass()) @@ -317,7 +318,6 @@ DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass) DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass) -DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass) DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass) DUMMY_MACHINE_FUNCTION_PASS("prologepilog-code", PrologEpilogCodeInserterPass) DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index aa3591cb6be58..b9b394909bb4c 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -106,8 +106,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePatchableFunctionLegacyPass(Registry); initializePeepholeOptimizerLegacyPass(Registry); initializePostMachineSchedulerLegacyPass(Registry); + initializePostRAMachineSinkingLegacyPass(Registry); initializePostRAHazardRecognizerLegacyPass(Registry); - initializePostRAMachineSinkingPass(Registry); initializePostRASchedulerLegacyPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 8411d5c4b09c8..a0e067a6323b7 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/PostRAMachineSink.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -2068,24 +2069,7 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy( //===----------------------------------------------------------------------===// namespace { -class PostRAMachineSinking : public MachineFunctionPass { -public: - bool runOnMachineFunction(MachineFunction &MF) override; - - static char ID; - PostRAMachineSinking() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { return "PostRA Machine Sink"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().setNoVRegs(); - } - -private: +class PostRAMachineSinkingImpl { /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; @@ -2099,13 +2083,36 @@ class PostRAMachineSinking : public MachineFunctionPass { /// successors. bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); + +public: + bool run(MachineFunction &MF); }; + +class PostRAMachineSinkingLegacy : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + PostRAMachineSinkingLegacy() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return "PostRA Machine Sink"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + } // namespace -char PostRAMachineSinking::ID = 0; -char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; +char PostRAMachineSinkingLegacy::ID = 0; +char &llvm::PostRAMachineSinkingID = PostRAMachineSinkingLegacy::ID; -INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", +INITIALIZE_PASS(PostRAMachineSinkingLegacy, "postra-machine-sink", "PostRA Machine Sink", false, false) static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, Register Reg, @@ -2226,10 +2233,10 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } -bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, - MachineFunction &MF, - const TargetRegisterInfo *TRI, - const TargetInstrInfo *TII) { +bool PostRAMachineSinkingImpl::tryToSinkCopy(MachineBasicBlock &CurBB, + MachineFunction &MF, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII) { SmallPtrSet SinkableBBs; // FIXME: For now, we sink only to a successor which has a single predecessor // so that we can directly sink COPY instructions to the successor without @@ -2354,10 +2361,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, return Changed; } -bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - +bool PostRAMachineSinkingImpl::run(MachineFunction &MF) { bool Changed = false; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); @@ -2369,3 +2373,23 @@ bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool PostRAMachineSinkingLegacy::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + return PostRAMachineSinkingImpl().run(MF); +} + +PreservedAnalyses +PostRAMachineSinkingPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + if (!PostRAMachineSinkingImpl().run(MF)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + return PA; +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 0f2896ff2e48b..572e5f19a1972 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -139,6 +139,7 @@ #include "llvm/CodeGen/PatchableFunction.h" #include "llvm/CodeGen/PeepholeOptimizer.h" #include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/PostRAMachineSink.h" #include "llvm/CodeGen/PostRASchedulerList.h" #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/RegAllocEvictionAdvisor.h" diff --git a/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir b/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir index 15c1ccb0e609b..cf148f77b206b 100644 --- a/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir +++ b/llvm/test/CodeGen/AArch64/bisect-post-ra-machine-sink.mir @@ -1,5 +1,7 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck -check-prefix=RUN-POSTRA %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes='postra-machine-sink' -o - %s | FileCheck -check-prefix=RUN-POSTRA %s # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -opt-bisect-limit=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=BISECT-NO-RUN-POSTRA %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes='postra-machine-sink' -opt-bisect-limit=0 -o - %s | FileCheck -check-prefix=BISECT-NO-RUN-POSTRA %s --- diff --git a/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir b/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir index a0eb3c1979391..8394bc3d0702b 100644 --- a/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir +++ b/llvm/test/CodeGen/AArch64/post-ra-machine-sink.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes='postra-machine-sink' -o - %s | FileCheck %s --- # Sink w19 to %bb.1. diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 03edb982d94e0..500d15e081680 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -10,9 +10,9 @@ ; GCN-O0: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O2: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) -; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) +; GCN-O3: require,require,require,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require,si-opt-vgpr-liverange,require,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy,virt-reg-rewriter,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy,si-lower-wwm-copies,virt-reg-rewriter,amdgpu-reserve-wwm-regs,greedy,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate)) define void @empty() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir index 86863c3175364..987c5a2baf66a 100644 --- a/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-machine-sink.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -passes='postra-machine-sink' -o - %s | FileCheck %s # Don't sink copy that writes sub-register of another copy source register # CHECK-LABEL: name: donotsinkcopy diff --git a/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir index 14617e066f954..8687d6ca4378f 100644 --- a/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir +++ b/llvm/test/CodeGen/AMDGPU/postra-sink-update-dependency.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes='postra-machine-sink' -o - %s | FileCheck %s # # In the example, the ` $sgpr4 = COPY $sgpr2` was incorrectly sunk into bb.3. This happened because we did not update # register uses when we found that `$sgpr2 = COPY $sgpr3` should not be sunk because of conflict with the successor's diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir index ff87c28bb7ec5..d134f290aa5f5 100644 --- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=postra-machine-sink -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes='postra-machine-sink' -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefixes=GFX10 %s # Ensure that PostRA Machine Sink does not sink instructions # past block prologues which would overwrite their uses. diff --git a/llvm/test/CodeGen/SystemZ/no-postra-sink.mir b/llvm/test/CodeGen/SystemZ/no-postra-sink.mir index 0db431535c051..b358ca5c6e56b 100644 --- a/llvm/test/CodeGen/SystemZ/no-postra-sink.mir +++ b/llvm/test/CodeGen/SystemZ/no-postra-sink.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -passes='postra-machine-sink' -o - %s | FileCheck %s --- # Don't sink COPY to bb.2 since SLLK define r13l that is aliased with r12q. diff --git a/llvm/test/CodeGen/X86/postra-ignore-dbg-instrs.mir b/llvm/test/CodeGen/X86/postra-ignore-dbg-instrs.mir index 7e37415463f45..c7e2e28228029 100644 --- a/llvm/test/CodeGen/X86/postra-ignore-dbg-instrs.mir +++ b/llvm/test/CodeGen/X86/postra-ignore-dbg-instrs.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=x86_64-none-linux-gnu -run-pass=postra-machine-sink -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-none-linux-gnu -passes=postra-machine-sink -o - %s | FileCheck %s # # This test was originally generated from the following sample: # diff --git a/llvm/test/CodeGen/X86/pr38952.mir b/llvm/test/CodeGen/X86/pr38952.mir index d67174aa4847c..1d18738cf4ad4 100644 --- a/llvm/test/CodeGen/X86/pr38952.mir +++ b/llvm/test/CodeGen/X86/pr38952.mir @@ -1,4 +1,5 @@ # RUN: llc %s -run-pass=postra-machine-sink -o - | FileCheck %s +# RUN: llc %s -passes='postra-machine-sink' -o - | FileCheck %s --- | ; Module stripped of everything, MIR below is what's interesting ; ModuleID = '' diff --git a/llvm/test/DebugInfo/MIR/X86/postra-subreg-sink.mir b/llvm/test/DebugInfo/MIR/X86/postra-subreg-sink.mir index 2307df9b392ba..b54f93edfc492 100644 --- a/llvm/test/DebugInfo/MIR/X86/postra-subreg-sink.mir +++ b/llvm/test/DebugInfo/MIR/X86/postra-subreg-sink.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=x86_64-unknown-unknown %s -run-pass=postra-machine-sink -o - | FileCheck %s +# RUN: llc -mtriple=x86_64-unknown-unknown %s -passes='postra-machine-sink' -o - | FileCheck %s # Test that when we run the postra machine sinker (which sinks COPYs), that # DBG_VALUEs of both sub and super-registers that depend on such COPYs are # sunk with them.