@@ -1770,8 +1770,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
17701770}
17711771
17721772void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
1773+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
1774+ // deleted soon.
1775+
1776+ if (EnableLowerKernelArguments)
1777+ addPass (AMDGPULowerKernelArgumentsPass (TM));
1778+
1779+ // This lowering has been placed after codegenprepare to take advantage of
1780+ // address mode matching (which is why it isn't put with the LDS lowerings).
1781+ // It could be placed anywhere before uniformity annotations (an analysis
1782+ // that it changes by splitting up fat pointers into their components)
1783+ // but has been put before switch lowering and CFG flattening so that those
1784+ // passes can run on the more optimized control flow this pass creates in
1785+ // many cases.
1786+ //
1787+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
1788+ // However, due to some annoying facts about ResourceUsageAnalysis,
1789+ // (especially as exercised in the resource-usage-dead-function test),
1790+ // we need all the function passes codegenprepare all the way through
1791+ // said resource usage analysis to run on the call graph produced
1792+ // before codegenprepare runs (because codegenprepare will knock some
1793+ // nodes out of the graph, which leads to function-level passes not
1794+ // being run on them, which causes crashes in the resource usage analysis).
1795+ addPass (AMDGPULowerBufferFatPointersPass (TM));
1796+
17731797 Base::addCodeGenPrepare (addPass);
17741798
1799+ if (isPassEnabled (EnableLoadStoreVectorizer))
1800+ addPass (LoadStoreVectorizerPass ());
1801+
17751802 // LowerSwitch pass may introduce unreachable blocks that can cause unexpected
17761803 // behavior for subsequent passes. Placing it here seems better that these
17771804 // blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -1839,3 +1866,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
18391866 addPass (SILowerI1CopiesPass ());
18401867 return Error::success ();
18411868}
1869+
1870+ bool AMDGPUCodeGenPassBuilder::isPassEnabled (const cl::opt<bool > &Opt,
1871+ CodeGenOptLevel Level) const {
1872+ if (Opt.getNumOccurrences ())
1873+ return Opt;
1874+ if (TM.getOptLevel () < Level)
1875+ return false ;
1876+ return Opt;
1877+ }
0 commit comments