File tree Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -322,6 +322,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
322
322
f"--gcs_scratch_location={ staging_location } " ,
323
323
# This should be made configurable
324
324
f"--num_elastic_slices={ cfg .accelerator .num_replicas } " ,
325
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
325
326
]
326
327
cmd_args .extend (xla_flags_from_options (self ._xla_options ).split ())
327
328
@@ -359,6 +360,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
359
360
f"--instance_type={ pathways_tpu_version } :{ system .topology } " ,
360
361
f"--gcs_scratch_location={ staging_location } " ,
361
362
"--alsologtostderr" ,
363
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
362
364
],
363
365
),
364
366
]
@@ -513,6 +515,7 @@ def _build_pathways_worker_container(
513
515
f"--resource_manager_address={ pathways_head_address } :"
514
516
+ f"{ _PATHWAYS_RESOURCE_MANAGER_PORT } " ,
515
517
f"--gcs_scratch_location={ cfg .output_dir } /pathways-staging" ,
518
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
516
519
]
517
520
mega_scale_args = xla_flags_from_options (self ._mxla_options ).split ()
518
521
worker_container ["args" ].extend (mega_scale_args )
You can’t perform that action at this time.
0 commit comments