increase tekton timeouts and add back the arches

MStokluska · openshift-merge-bot[bot] · commit e02c17b1b44e · 2025-12-04T10:58:57.000Z
diff --git a/.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-pull-request.yaml b/.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-pull-request.yaml
@@ -18,6 +18,9 @@ metadata:
   name: odh-training-th04-cuda128-torch29-py312-rhel9-on-pull-request
   namespace: open-data-hub-tenant
 spec:
+  timeouts:
+    pipeline: 20h
+    tasks: 18h
   params:
   - name: git-url
     value: '{{source_url}}'
diff --git a/.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-push.yaml b/.tekton/odh-training-th04-cuda128-torch29-py312-rhel9-push.yaml
@@ -17,6 +17,9 @@ metadata:
   name: odh-training-th04-cuda128-torch29-py312-rhel9-on-push
   namespace: open-data-hub-tenant
 spec:
+  timeouts:
+    pipeline: 20h
+    tasks: 18h
   params:
   - name: git-url
     value: '{{source_url}}'
diff --git a/images/universal/training/th04-cuda128-torch290-py312/Dockerfile b/images/universal/training/th04-cuda128-torch290-py312/Dockerfile
@@ -59,7 +59,7 @@ ENV NVIDIA_VISIBLE_DEVICES=all \
     CUDA_HOME=/usr/local/cuda \
     PATH=/usr/local/cuda/bin:$PATH \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
-    TORCH_CUDA_ARCH_LIST="8.6" \
+    TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0" \
     XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
 
 ################################################################################