Skip to content

Commit 78833d4

Browse files
Ethanlmchanglan
authored andcommitted
Disable xla_tpu_ici_sdc_test_run_on_program_start by default
GitOrigin-RevId: 65af801
1 parent 2e03b12 commit 78833d4

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

axlearn/common/compiler_options.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,9 @@ def infer_xsc_compiler_options(
334334
xla_tpu_sdc_checker_alternate_megacore_cores=True,
335335
# XLA ICI SDC Checker flags:
336336
# N.B. ICI checker only runs once after first program compilation.
337-
# Enable the interconnect checker on first program call.
338-
xla_tpu_ici_sdc_test_run_on_program_start=True,
337+
# Disable the interconnect checker by default as it is not meant for production run.
338+
# In a job with 32k chips, disabling it reduced compilation time from 18mins to 15s.
339+
xla_tpu_ici_sdc_test_run_on_program_start=False,
339340
# Max distance between send/recv neighbours.
340341
xla_tpu_ici_sdc_test_max_distance=1,
341342
# Number of repeated send/recv before checking for equivalence.

axlearn/common/compiler_options_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test_xsc_compiler_options(self):
7575
xla_tpu_sdc_check_halt_on_detection=False,
7676
xla_tpu_sdc_replicate_llo=True,
7777
xla_tpu_sdc_checker_alternate_megacore_cores=True,
78-
xla_tpu_ici_sdc_test_run_on_program_start=True,
78+
xla_tpu_ici_sdc_test_run_on_program_start=False,
7979
xla_tpu_ici_sdc_test_max_distance=1,
8080
xla_tpu_ici_sdc_test_pipeline_depth=4,
8181
xla_tpu_ici_sdc_test_buffer_size_chunks=32,

0 commit comments

Comments
 (0)