diff --git a/dags/multipod/maxtext_convergence.py b/dags/multipod/maxtext_convergence.py index 5f7f57951..c02ada322 100644 --- a/dags/multipod/maxtext_convergence.py +++ b/dags/multipod/maxtext_convergence.py @@ -84,9 +84,12 @@ sequential_tests = [] for test_name, run_command in convergence_tests.items(): + # The grain dataset takes longer to run, so we give it a longer timeout. The other tests are expected to complete within 5 hours. + timeout_in_min = 360 if test_name == "maxtext-convergence-grain" else 300 + test_task = gke_config.get_gke_config( cluster=XpkClusters.TPU_V6E_256_MLPERF_CLUSTER, - time_out_in_min=300, + time_out_in_min=timeout_in_min, test_name=test_name, run_model_cmds=run_command, docker_image=DockerImage.MAXTEXT_TPU_JAX_STABLE.value,