@@ -287,6 +287,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
287
287
MergeFunctions = false ;
288
288
UniqueLinkageNames = false ;
289
289
}
290
+ extern cl::opt<bool > ExtraVectorizerPasses;
290
291
291
292
extern cl::opt<bool > EnableConstraintElimination;
292
293
extern cl::opt<bool > EnableGVNHoist;
@@ -1255,6 +1256,28 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1255
1256
// Cleanup after the loop optimization passes.
1256
1257
OptimizePM.addPass (InstCombinePass ());
1257
1258
1259
+ if (Level.getSpeedupLevel () > 1 && ExtraVectorizerPasses) {
1260
+ // At higher optimization levels, try to clean up any runtime overlap and
1261
+ // alignment checks inserted by the vectorizer. We want to track correlated
1262
+ // runtime checks for two inner loops in the same outer loop, fold any
1263
+ // common computations, hoist loop-invariant aspects out of any outer loop,
1264
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
1265
+ // dead (or speculatable) control flows or more combining opportunities.
1266
+ OptimizePM.addPass (EarlyCSEPass ());
1267
+ OptimizePM.addPass (CorrelatedValuePropagationPass ());
1268
+ OptimizePM.addPass (InstCombinePass ());
1269
+ LoopPassManager LPM (DebugLogging);
1270
+ LPM.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
1271
+ LPM.addPass (
1272
+ SimpleLoopUnswitchPass (/* NonTrivial */ Level == OptimizationLevel::O3));
1273
+ OptimizePM.addPass (RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1274
+ OptimizePM.addPass (createFunctionToLoopPassAdaptor (
1275
+ std::move (LPM), EnableMSSALoopDependency, /* UseBlockFrequencyInfo=*/ true ,
1276
+ DebugLogging));
1277
+ OptimizePM.addPass (SimplifyCFGPass ());
1278
+ OptimizePM.addPass (InstCombinePass ());
1279
+ }
1280
+
1258
1281
// Now that we've formed fast to execute loop structures, we do further
1259
1282
// optimizations. These are run afterward as they might block doing complex
1260
1283
// analyses and transforms such as what are needed for loop vectorization.
@@ -1274,8 +1297,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1274
1297
.sinkCommonInsts (true )));
1275
1298
1276
1299
// Optimize parallel scalar instruction chains into SIMD instructions.
1277
- if (PTO.SLPVectorization )
1300
+ if (PTO.SLPVectorization ) {
1278
1301
OptimizePM.addPass (SLPVectorizerPass ());
1302
+ if (Level.getSpeedupLevel () > 1 && ExtraVectorizerPasses) {
1303
+ OptimizePM.addPass (EarlyCSEPass ());
1304
+ }
1305
+ }
1279
1306
1280
1307
// Enhance/cleanup vector code.
1281
1308
OptimizePM.addPass (VectorCombinePass ());
0 commit comments