@@ -324,6 +324,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
324
324
f"--gcs_scratch_location={ staging_location } " ,
325
325
# This should be made configurable
326
326
f"--num_elastic_slices={ cfg .accelerator .num_replicas } " ,
327
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
327
328
"--vmodule=grpc_host_buffer=3,rpc_helper=3,host_buffer=3,ifrt_backend=3,grpc_service_impl=3" ,
328
329
]
329
330
cmd_args .extend (xla_flags_from_options (self ._xla_options ).split ())
@@ -361,6 +362,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
361
362
f"--instance_count={ pathways_instance_count } " ,
362
363
f"--instance_type={ pathways_tpu_version } :{ system .topology } " ,
363
364
f"--gcs_scratch_location={ staging_location } " ,
365
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
364
366
],
365
367
),
366
368
]
@@ -515,6 +517,7 @@ def _build_pathways_worker_container(
515
517
f"--resource_manager_address={ pathways_head_address } :"
516
518
+ f"{ _PATHWAYS_RESOURCE_MANAGER_PORT } " ,
517
519
f"--gcs_scratch_location={ cfg .output_dir } /pathways-staging" ,
520
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
518
521
]
519
522
mega_scale_args = xla_flags_from_options (self ._mxla_options ).split ()
520
523
worker_container ["args" ].extend (mega_scale_args )
0 commit comments