File tree Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -294,6 +294,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
294
294
f"--resource_manager_address=localhost:{ _PATHWAYS_RESOURCE_MANAGER_PORT } " ,
295
295
f"--server_port={ _PATHWAYS_PROXY_PORT } " ,
296
296
f"--gcs_scratch_location={ staging_location } " ,
297
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
297
298
]
298
299
cmd_args .extend (xla_flags_from_options (self ._xla_options ).split ())
299
300
@@ -331,6 +332,7 @@ def _build_pathways_head_sidecar_containers(self) -> list[Nested[Any]]:
331
332
f"--instance_type={ pathways_tpu_version } :{ system .topology } " ,
332
333
f"--gcs_scratch_location={ staging_location } " ,
333
334
"--alsologtostderr" ,
335
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
334
336
],
335
337
),
336
338
]
@@ -485,6 +487,7 @@ def _build_pathways_worker_container(
485
487
f"--resource_manager_address={ pathways_head_address } :"
486
488
+ f"{ _PATHWAYS_RESOURCE_MANAGER_PORT } " ,
487
489
f"--gcs_scratch_location={ cfg .output_dir } /pathways-staging" ,
490
+ "--temporary_flags_for_debugging=temporary_flag_for_debugging_pipe_break_on_missing_keepalive=true" ,
488
491
]
489
492
mega_scale_args = xla_flags_from_options (self ._mxla_options ).split ()
490
493
worker_container ["args" ].extend (mega_scale_args )
You can’t perform that action at this time.
0 commit comments