2424#include < llvm/Passes/PassPlugin.h>
2525
2626// NewPM needs to manually include all the pass headers
27+ #include < llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
2728#include < llvm/Transforms/IPO/AlwaysInliner.h>
2829#include < llvm/Transforms/IPO/Annotation2Metadata.h>
2930#include < llvm/Transforms/IPO/ConstantMerge.h>
3031#include < llvm/Transforms/IPO/ForceFunctionAttrs.h>
3132#include < llvm/Transforms/IPO/GlobalDCE.h>
33+ #include < llvm/Transforms/IPO/GlobalOpt.h>
3234#include < llvm/Transforms/IPO/StripDeadPrototypes.h>
3335#include < llvm/Transforms/InstCombine/InstCombine.h>
3436#include < llvm/Transforms/Instrumentation/AddressSanitizer.h>
3537#include < llvm/Transforms/Instrumentation/MemorySanitizer.h>
3638#include < llvm/Transforms/Instrumentation/ThreadSanitizer.h>
3739#include < llvm/Transforms/Scalar/ADCE.h>
3840#include < llvm/Transforms/Scalar/AnnotationRemarks.h>
41+ #include < llvm/Transforms/Scalar/BDCE.h>
42+ #include " llvm/Transforms/Scalar/ConstraintElimination.h"
3943#include < llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
4044#include < llvm/Transforms/Scalar/DCE.h>
4145#include < llvm/Transforms/Scalar/DeadStoreElimination.h>
5963#include < llvm/Transforms/Scalar/LowerConstantIntrinsics.h>
6064#include < llvm/Transforms/Scalar/LowerExpectIntrinsic.h>
6165#include < llvm/Transforms/Scalar/MemCpyOptimizer.h>
66+ #include < llvm/Transforms/Scalar/MergedLoadStoreMotion.h>
6267#include < llvm/Transforms/Scalar/Reassociate.h>
6368#include < llvm/Transforms/Scalar/SCCP.h>
6469#include < llvm/Transforms/Scalar/SROA.h>
6570#include < llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
6671#include < llvm/Transforms/Scalar/SimplifyCFG.h>
6772#include < llvm/Transforms/Scalar/WarnMissedTransforms.h>
73+ #include < llvm/Transforms/Utils/LibCallsShrinkWrap.h>
6874#include < llvm/Transforms/Utils/InjectTLIMappings.h>
75+ #include < llvm/Transforms/Utils/Mem2Reg.h>
76+ #include < llvm/Transforms/Utils/RelLookupTableConverter.h>
6977#include < llvm/Transforms/Utils/ModuleUtils.h>
7078#include < llvm/Transforms/Utils/SimplifyCFGOptions.h>
7179#include < llvm/Transforms/Vectorize/LoopVectorize.h>
@@ -196,10 +204,9 @@ namespace {
196204 .convertSwitchRangeToICmp (true )
197205 .convertSwitchToLookupTable (true )
198206 .forwardSwitchCondToPhi (true )
199- // These mess with loop rotation, so only do them after that
207+ . needCanonicalLoops ( false )
200208 .hoistCommonInsts (true )
201- // Causes an SRET assertion error in late-gc-lowering
202- // .sinkCommonInsts(true)
209+ .sinkCommonInsts (true )
203210 ;
204211 }
205212
@@ -341,10 +348,16 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
341348 FPM.addPass (DCEPass ());
342349 FPM.addPass (SimplifyCFGPass (basicSimplifyCFGOptions ()));
343350 if (O.getSpeedupLevel () >= 1 ) {
344- // TODO check the LLVM 15 default.
345- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
351+ FPM. addPass ( SROAPass (SROAOptions::ModifyCFG));
352+ FPM.addPass (EarlyCSEPass ( ));
346353 }
347354 MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
355+ if (O.getSpeedupLevel () >= 1 ) {
356+ FunctionPassManager GlobalFPM;
357+ MPM.addPass (GlobalOptPass ());
358+ GlobalFPM.addPass (PromotePass ());
359+ GlobalFPM.addPass (InstCombinePass ());
360+ }
348361 }
349362 invokeEarlySimplificationCallbacks (MPM, PB, O);
350363 }
@@ -379,22 +392,24 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
379392 if (O.getSpeedupLevel () >= 1 ) {
380393 FunctionPassManager FPM;
381394 if (O.getSpeedupLevel () >= 2 ) {
382- // TODO check the LLVM 15 default.
383- FPM.addPass (SROAPass (SROAOptions::PreserveCFG));
384- // SROA can duplicate PHI nodes which can block LowerSIMD
385- FPM.addPass (InstCombinePass ());
386- FPM.addPass (JumpThreadingPass ());
387- FPM.addPass (CorrelatedValuePropagationPass ());
388- FPM.addPass (ReassociatePass ());
389- FPM.addPass (EarlyCSEPass ());
390- JULIA_PASS (FPM.addPass (AllocOptPass ()));
391- } else { // if (O.getSpeedupLevel() >= 1) (exactly)
392- FPM.addPass (InstCombinePass ());
393- FPM.addPass (EarlyCSEPass ());
394- }
395- invokePeepholeEPCallbacks (FPM, PB, O);
396- MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
395+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG));
396+ FPM.addPass (EarlyCSEPass (true ));
397+ FPM.addPass (InstCombinePass ());
398+ FPM.addPass (AggressiveInstCombinePass ());
399+ FPM.addPass (JumpThreadingPass ());
400+ FPM.addPass (CorrelatedValuePropagationPass ());
401+ FPM.addPass (LibCallsShrinkWrapPass ());
402+ FPM.addPass (ReassociatePass ());
403+ FPM.addPass (ConstraintEliminationPass ());
404+ JULIA_PASS (FPM.addPass (AllocOptPass ()));
405+ } else { // if (O.getSpeedupLevel() >= 1) (exactly)
406+ FPM.addPass (EarlyCSEPass ());
407+ FPM.addPass (InstCombinePass ());
408+ }
409+ invokePeepholeEPCallbacks (FPM, PB, O);
410+ MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM), /* UseMemorySSA = */ true ));
397411 }
412+ MPM.addPass (GlobalOptPass ());
398413 MPM.addPass (GlobalDCEPass ());
399414 }
400415 MPM.addPass (AfterEarlyOptimizationMarkerPass ());
@@ -407,41 +422,41 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB
407422 LoopPassManager LPM;
408423 LPM.addPass (LowerSIMDLoopPass ());
409424 if (O.getSpeedupLevel () >= 2 ) {
410- LPM.addPass (LoopRotatePass ());
425+ LPM.addPass (LoopInstSimplifyPass ());
426+ LPM.addPass (LoopSimplifyCFGPass ());
427+ LPM.addPass (BeforeLICMMarkerPass ());
428+ auto opts = LICMOptions ();
429+ opts.AllowSpeculation = false ;
430+ LPM.addPass (LICMPass (opts));
431+ LPM.addPass (JuliaLICMPass ());
432+ LPM.addPass (LoopRotatePass (true , false ));
433+ LPM.addPass (LICMPass (LICMOptions ()));
434+ LPM.addPass (JuliaLICMPass ());
435+ LPM.addPass (AfterLICMMarkerPass ());
436+ LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
411437 }
412438 invokeLateLoopOptimizationCallbacks (LPM, PB, O);
413439 // We don't know if the loop callbacks support MSSA
414- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ false ));
415- }
416- if (O.getSpeedupLevel () >= 2 ) {
417- LoopPassManager LPM;
418- LPM.addPass (BeforeLICMMarkerPass ());
419- LPM.addPass (LICMPass (LICMOptions ()));
420- LPM.addPass (JuliaLICMPass ());
421- LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
422- LPM.addPass (LICMPass (LICMOptions ()));
423- LPM.addPass (JuliaLICMPass ());
424- LPM.addPass (AfterLICMMarkerPass ());
425- // LICM needs MemorySSA now, so we must use it
426440 FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ true ));
427441 }
428- if (O.getSpeedupLevel () >= 2 ) {
442+ if (O.getSpeedupLevel () >= 2 )
429443 FPM.addPass (IRCEPass ());
430- }
431444 {
432445 LoopPassManager LPM;
433446 LPM.addPass (BeforeLoopSimplificationMarkerPass ());
434447 if (O.getSpeedupLevel () >= 2 ) {
435- LPM.addPass (LoopInstSimplifyPass ());
436448 LPM.addPass (LoopIdiomRecognizePass ());
437449 LPM.addPass (IndVarSimplifyPass ());
450+ LPM.addPass (SimpleLoopUnswitchPass (/* NonTrivial*/ true , true ));
438451 LPM.addPass (LoopDeletionPass ());
439452 // This unroll will only unroll loops when the trip count is known and small,
440453 // so that no loop remains
441454 LPM.addPass (LoopFullUnrollPass ());
442455 }
443456 invokeLoopOptimizerEndCallbacks (LPM, PB, O);
444457 LPM.addPass (AfterLoopSimplificationMarkerPass ());
458+ FPM.addPass (SimplifyCFGPass (basicSimplifyCFGOptions ()));
459+ FPM.addPass (InstCombinePass ());
445460 // We don't know if the loop end callbacks support MSSA
446461 FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ false ));
447462 }
@@ -454,17 +469,28 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
454469 if (options.enable_scalar_optimizations ) {
455470 if (O.getSpeedupLevel () >= 2 ) {
456471 JULIA_PASS (FPM.addPass (AllocOptPass ()));
457- // TODO check the LLVM 15 default.
458- FPM.addPass (SROAPass (SROAOptions::PreserveCFG ));
459- FPM.addPass (InstSimplifyPass ());
472+ FPM. addPass ( SROAPass (SROAOptions::ModifyCFG));
473+ FPM.addPass (VectorCombinePass ( /* TryEarlyFoldsOnly= */ true ));
474+ FPM.addPass (MergedLoadStoreMotionPass ());
460475 FPM.addPass (GVNPass ());
461- FPM.addPass (MemCpyOptPass ());
462476 FPM.addPass (SCCPPass ());
477+ FPM.addPass (BDCEPass ());
478+ FPM.addPass (InstCombinePass ());
463479 FPM.addPass (CorrelatedValuePropagationPass ());
464- FPM.addPass (DCEPass ());
480+ FPM.addPass (ADCEPass ());
481+ FPM.addPass (MemCpyOptPass ());
482+ FPM.addPass (DSEPass ());
465483 FPM.addPass (IRCEPass ());
466- FPM.addPass (InstCombinePass ());
467484 FPM.addPass (JumpThreadingPass ());
485+ FPM.addPass (ConstraintEliminationPass ());
486+ } else if (O.getSpeedupLevel () >= 1 ) {
487+ JULIA_PASS (FPM.addPass (AllocOptPass ()));
488+ FPM.addPass (SROAPass (SROAOptions::ModifyCFG));
489+ FPM.addPass (MemCpyOptPass ());
490+ FPM.addPass (SCCPPass ());
491+ FPM.addPass (BDCEPass ());
492+ FPM.addPass (InstCombinePass ());
493+ FPM.addPass (ADCEPass ());
468494 }
469495 if (O.getSpeedupLevel () >= 3 ) {
470496 FPM.addPass (GVNPass ());
@@ -476,12 +502,15 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
476502 JULIA_PASS (FPM.addPass (AllocOptPass ()));
477503 {
478504 LoopPassManager LPM;
479- LPM.addPass (LoopDeletionPass ( ));
480- LPM.addPass (LoopInstSimplifyPass ());
481- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM)));
505+ LPM.addPass (LICMPass ( LICMOptions () ));
506+ LPM.addPass (JuliaLICMPass ());
507+ FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA = */ true ));
482508 }
483- FPM.addPass (LoopDistributePass ());
484- }
509+ FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
510+ FPM.addPass (InstCombinePass ());
511+ } else if (O.getSpeedupLevel () >= 1 )
512+ FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
513+
485514 invokeScalarOptimizerCallbacks (FPM, PB, O);
486515 }
487516 FPM.addPass (AfterScalarOptimizationMarkerPass ());
@@ -491,19 +520,27 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim
491520 FPM.addPass (BeforeVectorizationMarkerPass ());
492521 if (options.enable_vector_pipeline ) {
493522 // TODO look into loop vectorize options
523+ // Rerotate loops that might have been unrotated in the simplification
524+ LoopPassManager LPM;
525+ LPM.addPass (LoopRotatePass ());
526+ LPM.addPass (LoopDeletionPass ());
527+ FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
528+ FPM.addPass (LoopDistributePass ());
494529 FPM.addPass (InjectTLIMappings ());
495530 FPM.addPass (LoopVectorizePass ());
496531 FPM.addPass (LoopLoadEliminationPass ());
497- FPM.addPass (InstCombinePass ());
498532 FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
533+ FPM.addPass (createFunctionToLoopPassAdaptor (LICMPass (LICMOptions ()), /* UseMemorySSA=*/ true , /* UseBlockFrequencyInfo=*/ false ));
534+ FPM.addPass (EarlyCSEPass ());
535+ FPM.addPass (CorrelatedValuePropagationPass ());
536+ FPM.addPass (InstCombinePass ());
499537 FPM.addPass (SLPVectorizerPass ());
500- invokeVectorizerCallbacks (FPM, PB, O);
501538 FPM.addPass (VectorCombinePass ());
502- FPM.addPass (ADCEPass ());
503- // TODO add BDCEPass here?
504- // This unroll will unroll vectorized loops
505- // as well as loops that we tried but failed to vectorize
539+ invokeVectorizerCallbacks (FPM, PB, O);
506540 FPM.addPass (LoopUnrollPass (LoopUnrollOptions (O.getSpeedupLevel (), /* OnlyWhenForced = */ false , /* ForgetSCEV = */ false )));
541+ FPM.addPass (SROAPass (SROAOptions::PreserveCFG));
542+ FPM.addPass (InstSimplifyPass ());
543+ FPM.addPass (AfterVectorizationMarkerPass ());
507544 }
508545 FPM.addPass (AfterVectorizationMarkerPass ());
509546}
@@ -525,18 +562,18 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
525562 FunctionPassManager FPM;
526563 JULIA_PASS (FPM.addPass (LateLowerGCPass ()));
527564 JULIA_PASS (FPM.addPass (FinalLowerGCPass ()));
528- if (O.getSpeedupLevel () >= 2 ) {
529- FPM.addPass (DSEPass ());
530- FPM.addPass (GVNPass ());
531- FPM.addPass (SCCPPass ());
532- FPM.addPass (DCEPass ());
533- }
534565 MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
535566 }
536567 JULIA_PASS (MPM.addPass (LowerPTLSPass (options.dump_native )));
537568 MPM.addPass (RemoveJuliaAddrspacesPass ()); // TODO: Make this conditional on arches (GlobalISel doesn't like our addrsspaces)
538569 if (O.getSpeedupLevel () >= 1 ) {
539570 FunctionPassManager FPM;
571+ if (O.getSpeedupLevel () >= 2 ) {
572+ FPM.addPass (DSEPass ());
573+ FPM.addPass (GVNPass ());
574+ FPM.addPass (SCCPPass ());
575+ FPM.addPass (DCEPass ());
576+ }
540577 FPM.addPass (InstCombinePass ());
541578 FPM.addPass (SimplifyCFGPass (aggressiveSimplifyCFGOptions ()));
542579 MPM.addPass (createModuleToFunctionPassAdaptor (std::move (FPM)));
0 commit comments