|
29 | 29 | #include "AMDGPUTargetObjectFile.h" |
30 | 30 | #include "AMDGPUTargetTransformInfo.h" |
31 | 31 | #include "AMDGPUUnifyDivergentExitNodes.h" |
32 | | -#include "AMDGPUVectorIdiom.h" |
33 | 32 | #include "AMDGPUWaitSGPRHazards.h" |
34 | 33 | #include "GCNDPPCombine.h" |
35 | 34 | #include "GCNIterativeScheduler.h" |
@@ -850,12 +849,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { |
850 | 849 | EnablePromoteKernelArguments) |
851 | 850 | FPM.addPass(AMDGPUPromoteKernelArgumentsPass()); |
852 | 851 |
|
853 | | - // Run vector-idiom canonicalization early (after inlining) and before |
854 | | - // infer-AS / SROA to maximize scalarization opportunities. |
855 | | - // Specify 32 bytes since the largest HIP vector types are double4 or |
856 | | - // long4. |
857 | | - FPM.addPass(AMDGPUVectorIdiomCombinePass(/*MaxBytes=*/32)); |
858 | | - |
859 | 852 | // Add infer address spaces pass to the opt pipeline after inlining |
860 | 853 | // but before SROA to increase SROA opportunities. |
861 | 854 | FPM.addPass(InferAddressSpacesPass()); |
@@ -918,8 +911,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { |
918 | 911 | if (EnableLowerModuleLDS) |
919 | 912 | PM.addPass(AMDGPULowerModuleLDSPass(*this)); |
920 | 913 | if (Level != OptimizationLevel::O0) { |
921 | | - PM.addPass(createModuleToFunctionPassAdaptor( |
922 | | - AMDGPUVectorIdiomCombinePass(/*MaxBytes=*/32))); |
923 | 914 | // Do we really need internalization in LTO? |
924 | 915 | if (InternalizeSymbols) { |
925 | 916 | PM.addPass(InternalizePass(mustPreserveGV)); |
|
0 commit comments