@@ -322,6 +322,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
322
322
f"--gcs_scratch_location={ staging_location } " ,
323
323
# This should be made configurable
324
324
f"--num_elastic_slices={ cfg .accelerator .num_replicas } " ,
325
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
325
326
]
326
327
cmd_args .extend (xla_flags_from_options (self ._xla_options ).split ())
327
328
@@ -358,6 +359,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
358
359
f"--instance_count={ pathways_instance_count } " ,
359
360
f"--instance_type={ pathways_tpu_version } :{ system .topology } " ,
360
361
f"--gcs_scratch_location={ staging_location } " ,
362
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
361
363
],
362
364
),
363
365
]
@@ -512,6 +514,7 @@ def _build_pathways_worker_container(
512
514
f"--resource_manager_address={ pathways_head_address } :"
513
515
+ f"{ _PATHWAYS_RESOURCE_MANAGER_PORT } " ,
514
516
f"--gcs_scratch_location={ cfg .output_dir } /pathways-staging" ,
517
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
515
518
]
516
519
mega_scale_args = xla_flags_from_options (self ._mxla_options ).split ()
517
520
worker_container ["args" ].extend (mega_scale_args )
0 commit comments