Skip to content

Commit c0f84d6

Browse files
Bring together gemm autotuning passes.
To seperately check the effect of pipeline change before migrating the gemm_fusion_autotuner. PiperOrigin-RevId: 839340018
1 parent de6c87c commit c0f84d6

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

xla/service/gpu/gpu_compiler.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1717,6 +1717,10 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
17171717
AddGemmRewriterPasses(pipeline, debug_options, gpu_version,
17181718
gpu_target_config.device_description.runtime_version());
17191719

1720+
TF_RETURN_IF_ERROR(AddConvAndGemmAutotuningPasses(
1721+
&pipeline, gpu_version, options, hlo_module, autotune_config, thread_pool,
1722+
stream_exec, &gpu_target_config));
1723+
17201724
// Rewrite GEMMs with broadcasted inputs as strided GEMMs.
17211725
pipeline.AddPass<GemmBroadcastFoldingRewriter>();
17221726

@@ -1734,10 +1738,6 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment(
17341738

17351739
pipeline.AddPass<HostOffloader>(alias_info);
17361740

1737-
TF_RETURN_IF_ERROR(AddConvAndGemmAutotuningPasses(
1738-
&pipeline, gpu_version, options, hlo_module, autotune_config, thread_pool,
1739-
stream_exec, &gpu_target_config));
1740-
17411741
// The GEMM fusion autotuner can insert new bf16 reductions that need to be
17421742
// normalized again.
17431743
add_float_normalization(pipeline);

0 commit comments

Comments
 (0)