@@ -120,8 +120,10 @@ Optimizer& Optimizer::RegisterPass(PassToken&& p) {
120120// The legalization problem is essentially a very general copy propagation
121121// problem. The optimization we use are all used to either do copy propagation
122122// or enable more copy propagation.
123- Optimizer& Optimizer::RegisterLegalizationPasses (bool preserve_interface) {
124- return
123+ Optimizer& Optimizer::RegisterLegalizationPasses (bool preserve_interface,
124+ bool include_loop_unroll,
125+ SSARewriteMode ssa_rewrite_mode) {
126+ auto & optimizer =
125127 // Wrap OpKill instructions so all other code can be inlined.
126128 RegisterPass (CreateWrapOpKillPass ())
127129 // Remove unreachable block so that merge return works.
@@ -130,87 +132,93 @@ Optimizer& Optimizer::RegisterLegalizationPasses(bool preserve_interface) {
130132 .RegisterPass (CreateMergeReturnPass ())
131133 // Make sure uses and definitions are in the same function.
132134 .RegisterPass (CreateInlineExhaustivePass ())
133- // Make private variable function scope
134- .RegisterPass (CreateEliminateDeadFunctionsPass ())
135- .RegisterPass (CreatePrivateToLocalPass ())
136- // Fix up the storage classes that DXC may have purposely generated
137- // incorrectly. All functions are inlined, and a lot of dead code has
138- // been removed.
139- .RegisterPass (CreateFixStorageClassPass ())
140- // Propagate the value stored to the loads in very simple cases.
141- .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
142- .RegisterPass (CreateLocalSingleStoreElimPass ())
143- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
144- // Split up aggregates so they are easier to deal with.
145- .RegisterPass (CreateScalarReplacementPass (0 ))
146- // Remove loads and stores so everything is in intermediate values.
147- // Takes care of copy propagation of non-members.
148- .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
149- .RegisterPass (CreateLocalSingleStoreElimPass ())
150- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
151- .RegisterPass (CreateLocalMultiStoreElimPass ())
152- .RegisterPass (CreateCombineAccessChainsPass ())
153- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
154- .RegisterPass (CreateLegalizeMultidimArrayPass ())
155- // Propagate constants to get as many constant conditions on branches
156- // as possible.
157- .RegisterPass (CreateCCPPass ())
158- .RegisterPass (CreateLoopUnrollPass (true ))
159- .RegisterPass (CreateDeadBranchElimPass ())
160- // Copy propagate members. Cleans up code sequences generated by
161- // scalar replacement. Also important for removing OpPhi nodes.
162- .RegisterPass (CreateSimplificationPass ())
163- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
164- .RegisterPass (CreateCopyPropagateArraysPass ())
165- // May need loop unrolling here see
166- // https://github.com/Microsoft/DirectXShaderCompiler/pull/930
167- // Get rid of unused code that contain traces of illegal code
168- // or unused references to unbound external objects
169- .RegisterPass (CreateVectorDCEPass ())
170- .RegisterPass (CreateDeadInsertElimPass ())
171- .RegisterPass (CreateReduceLoadSizePass ())
172- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
173- .RegisterPass (CreateRemoveUnusedInterfaceVariablesPass ())
174- .RegisterPass (CreateInterpolateFixupPass ())
175- .RegisterPass (CreateInvocationInterlockPlacementPass ())
176- .RegisterPass (CreateOpExtInstWithForwardReferenceFixupPass ());
135+ .RegisterPass (CreateEliminateDeadFunctionsPass ());
136+ optimizer.RegisterPass (CreatePrivateToLocalPass ());
137+ // Fix up the storage classes that DXC may have purposely generated
138+ // incorrectly. All functions are inlined, and a lot of dead code has
139+ // been removed.
140+ optimizer.RegisterPass (CreateFixStorageClassPass ());
141+ // Propagate the value stored to the loads in very simple cases.
142+ optimizer.RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
143+ .RegisterPass (CreateLocalSingleStoreElimPass ())
144+ .RegisterPass (CreateAggressiveDCEPass (preserve_interface));
145+ optimizer
146+ // Split up aggregates so they are easier to deal with.
147+ .RegisterPass (CreateScalarReplacementPass (0 ));
148+ // Remove loads and stores so everything is in intermediate values.
149+ // Takes care of copy propagation of non-members.
150+ optimizer.RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
151+ .RegisterPass (CreateLocalSingleStoreElimPass ())
152+ .RegisterPass (CreateAggressiveDCEPass (preserve_interface));
153+ if (ssa_rewrite_mode != SSARewriteMode::None) {
154+ optimizer.RegisterPass (CreateSSARewritePass (ssa_rewrite_mode));
155+ }
156+ optimizer
157+ // Propagate constants to get as many constant conditions on branches
158+ // as possible.
159+ .RegisterPass (CreateCCPPass ());
160+ if (include_loop_unroll) {
161+ optimizer.RegisterPass (CreateLoopUnrollPass (true ));
162+ }
163+ optimizer.RegisterPass (CreateDeadBranchElimPass ())
164+ // Copy propagate members. Cleans up code sequences generated by scalar
165+ // replacement. Also important for removing OpPhi nodes.
166+ .RegisterPass (CreateSimplificationPass ());
167+ return optimizer
168+ // May need loop unrolling here see
169+ // https://github.com/Microsoft/DirectXShaderCompiler/pull/930
170+ // Get rid of unused code that contain traces of illegal code
171+ // or unused references to unbound external objects
172+ .RegisterPass (CreateVectorDCEPass ())
173+ .RegisterPass (CreateDeadInsertElimPass ())
174+ .RegisterPass (CreateReduceLoadSizePass ())
175+ .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
176+ .RegisterPass (CreateRemoveUnusedInterfaceVariablesPass ())
177+ .RegisterPass (CreateInterpolateFixupPass ())
178+ .RegisterPass (CreateInvocationInterlockPlacementPass ())
179+ .RegisterPass (CreateOpExtInstWithForwardReferenceFixupPass ());
177180}
178181
179182Optimizer& Optimizer::RegisterLegalizationPasses () {
180- return RegisterLegalizationPasses (false );
183+ return RegisterLegalizationPasses (false , true , SSARewriteMode::All);
184+ }
185+
186+ Optimizer& Optimizer::RegisterLegalizationPasses (bool preserve_interface) {
187+ return RegisterLegalizationPasses (preserve_interface, true ,
188+ SSARewriteMode::All);
181189}
182190
183191Optimizer& Optimizer::RegisterPerformancePasses (bool preserve_interface) {
184- return RegisterPass (CreateWrapOpKillPass ())
192+ auto & optimizer = RegisterPass (CreateWrapOpKillPass ())
185193 .RegisterPass (CreateDeadBranchElimPass ())
186194 .RegisterPass (CreateMergeReturnPass ())
187195 .RegisterPass (CreateInlineExhaustivePass ())
188196 .RegisterPass (CreateEliminateDeadFunctionsPass ())
189- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
190197 .RegisterPass (CreatePrivateToLocalPass ())
191198 .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
192199 .RegisterPass (CreateLocalSingleStoreElimPass ())
193200 .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
194201 .RegisterPass (CreateScalarReplacementPass (0 ))
195- .RegisterPass (CreateLocalAccessChainConvertPass ())
196- .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
202+ .RegisterPass (CreateLocalAccessChainConvertPass ());
203+ optimizer .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
197204 .RegisterPass (CreateLocalSingleStoreElimPass ())
198- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
199- .RegisterPass (CreateLocalMultiStoreElimPass ())
200- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
201- .RegisterPass (CreateCCPPass ())
202- .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
203- .RegisterPass (CreateLoopUnrollPass (true ))
204- .RegisterPass (CreateDeadBranchElimPass ())
205- .RegisterPass (CreateRedundancyEliminationPass ())
206- .RegisterPass (CreateCombineAccessChainsPass ())
205+ .RegisterPass (CreateAggressiveDCEPass (preserve_interface));
206+ optimizer.RegisterPass (CreateCCPPass ())
207+ .RegisterPass (CreateAggressiveDCEPass (preserve_interface));
208+ // Preserve LoopControl::Unroll in the IR instead of always materializing
209+ // it here. The optimizer-side full unroll is very costly on large modules
210+ // with many tiny [unroll]-annotated loops, while the hint remains available
211+ // to downstream consumers in the final SPIR-V.
212+ optimizer.RegisterPass (CreateDeadBranchElimPass ());
213+ optimizer.RegisterPass (CreateLocalRedundancyEliminationPass ());
214+ optimizer.RegisterPass (CreateCombineAccessChainsPass ())
207215 .RegisterPass (CreateSimplificationPass ())
208216 .RegisterPass (CreateScalarReplacementPass (0 ))
209217 .RegisterPass (CreateLocalAccessChainConvertPass ())
210218 .RegisterPass (CreateLocalSingleBlockLoadStoreElimPass ())
211219 .RegisterPass (CreateLocalSingleStoreElimPass ())
212220 .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
213- .RegisterPass (CreateSSARewritePass ())
221+ .RegisterPass (CreateSSARewritePass (SSARewriteMode::SpecialTypes ))
214222 .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
215223 .RegisterPass (CreateVectorDCEPass ())
216224 .RegisterPass (CreateDeadInsertElimPass ())
@@ -220,9 +228,9 @@ Optimizer& Optimizer::RegisterPerformancePasses(bool preserve_interface) {
220228 .RegisterPass (CreateCopyPropagateArraysPass ())
221229 .RegisterPass (CreateReduceLoadSizePass ())
222230 .RegisterPass (CreateAggressiveDCEPass (preserve_interface))
223- .RegisterPass (CreateBlockMergePass ())
224- .RegisterPass (CreateRedundancyEliminationPass ())
225- .RegisterPass (CreateDeadBranchElimPass ())
231+ .RegisterPass (CreateBlockMergePass ());
232+ optimizer .RegisterPass (CreateLocalRedundancyEliminationPass ());
233+ return optimizer .RegisterPass (CreateDeadBranchElimPass ())
226234 .RegisterPass (CreateBlockMergePass ())
227235 .RegisterPass (CreateSimplificationPass ());
228236}
@@ -401,8 +409,6 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag,
401409 RegisterPass (CreateFoldSpecConstantOpAndCompositePass ());
402410 } else if (pass_name == " loop-unswitch" ) {
403411 RegisterPass (CreateLoopUnswitchPass ());
404- } else if (pass_name == " legalize-multidim-array" ) {
405- RegisterPass (CreateLegalizeMultidimArrayPass ());
406412 } else if (pass_name == " scalar-replacement" ) {
407413 if (pass_args.size () == 0 ) {
408414 RegisterPass (CreateScalarReplacementPass (0 ));
@@ -965,11 +971,6 @@ Optimizer::PassToken CreateLoopUnswitchPass() {
965971 MakeUnique<opt::LoopUnswitchPass>());
966972}
967973
968- Optimizer::PassToken CreateLegalizeMultidimArrayPass () {
969- return MakeUnique<Optimizer::PassToken::Impl>(
970- MakeUnique<opt::LegalizeMultidimArrayPass>());
971- }
972-
973974Optimizer::PassToken CreateRedundancyEliminationPass () {
974975 return MakeUnique<Optimizer::PassToken::Impl>(
975976 MakeUnique<opt::RedundancyEliminationPass>());
@@ -1019,9 +1020,9 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor) {
10191020 MakeUnique<opt::LoopUnroller>(fully_unroll, factor));
10201021}
10211022
1022- Optimizer::PassToken CreateSSARewritePass () {
1023+ Optimizer::PassToken CreateSSARewritePass (SSARewriteMode mode ) {
10231024 return MakeUnique<Optimizer::PassToken::Impl>(
1024- MakeUnique<opt::SSARewritePass>());
1025+ MakeUnique<opt::SSARewritePass>(mode ));
10251026}
10261027
10271028Optimizer::PassToken CreateCopyPropagateArraysPass () {
0 commit comments