@@ -59,51 +59,47 @@ jobs:
5959 run : |
6060 gcloud config set compute/zone us-east4-a
6161 gcloud config get compute/zone
62- - name : Install xpk dependencies
63- run : |
64- make install
65- echo $PWD/bin >> "$GITHUB_PATH"
6662 - name : Check xpk installation
6763 run : xpk --help
6864 - name : Create an XPK Cluster with zero node pools
69- run : python xpk.py cluster create --cluster $EMPTY_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=0 --zone=europe-west4 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V5_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}'
65+ run : python xpk.py cluster create --cluster $EMPTY_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=0 --zone=us-central2 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}'
7066 - name : Delete the cluster created
71- run : python xpk.py cluster delete --cluster $EMPTY_CLUSTER_NAME --zone=europe-west4 -b --force
67+ run : python xpk.py cluster delete --cluster $EMPTY_CLUSTER_NAME --zone=us-central2 -b --force
7268 if : always()
7369 - name : Create a Private XPK Cluster with zero node pools
74- run : python xpk.py cluster create --cluster $PRIVATE_CLUSTER_NAME --private --tpu-type=v5p-8 --num-slices=0 --zone=europe-west4 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V5_RESERVATION }}' --custom-cluster-arguments='${CLUSTER_NETWORK_ARGUMENTS}'
70+ run : python xpk.py cluster create --cluster $PRIVATE_CLUSTER_NAME --private --tpu-type=v5p-8 --num-slices=0 --zone=us-central2 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments='${CLUSTER_NETWORK_ARGUMENTS}'
7571 - name : Verify the created cluster is private
76- run : gcloud container clusters describe $PRIVATE_CLUSTER_NAME --region=europe-west4 --format="value(privateClusterConfig.enablePrivateNodes)" | grep 'True' || (echo 'The created cluster is not private.' && exit 1)
72+ run : gcloud container clusters describe $PRIVATE_CLUSTER_NAME --region=us-central2 --format="value(privateClusterConfig.enablePrivateNodes)" | grep 'True' || (echo 'The created cluster is not private.' && exit 1)
7773 - name : Delete the cluster created
78- run : python xpk.py cluster delete --cluster $PRIVATE_CLUSTER_NAME --zone=europe-west4 -b --force
74+ run : python xpk.py cluster delete --cluster $PRIVATE_CLUSTER_NAME --zone=us-central2 -b --force
7975 if : always()
8076 - name : Create an XPK Cluster with 2x v5p-8 nodepools
81- run : python xpk.py cluster create --cluster $TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=europe-west4 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V5_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}'
77+ run : python xpk.py cluster create --cluster $TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=us-central2 -b --default-pool-cpu-machine-type=n1-standard-16 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS }}'
8278 - name : Authenticate Docker
8379 run : gcloud auth configure-docker --quiet
8480 - name : Create test script to execute in workloads
8581 run : echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh
8682 - name : Run a base-docker-image workload
87- run : python xpk.py workload create --cluster $TPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --tpu-type=v5p-8 --num-slices=2 --zone=europe-west4 -b
83+ run : python xpk.py workload create --cluster $TPU_CLUSTER_NAME --workload $WORKLOAD_NAME --command "bash workload.sh" --tpu-type=v5p-8 --num-slices=2 --zone=us-central2 -b
8884 - name : List out the workloads on the cluster
89- run : python3 xpk.py workload list --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b
85+ run : python3 xpk.py workload list --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b
9086 - name : Run xpk inspector with the workload created above
91- run : python3 xpk.py inspector --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b --workload $WORKLOAD_NAME
87+ run : python3 xpk.py inspector --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b --workload $WORKLOAD_NAME
9288 - name : Wait for workload completion and confirm it succeeded
93- run : python3 xpk.py workload list --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b --wait-for-job-completion $WORKLOAD_NAME --timeout 300
89+ run : python3 xpk.py workload list --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b --wait-for-job-completion $WORKLOAD_NAME --timeout 300
9490 - name : Run xpk info command
95- run : python3 xpk.py info --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b
91+ run : python3 xpk.py info --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b
9692 - name : Delete the workload on the cluster
97- run : python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b
93+ run : python3 xpk.py workload delete --workload $WORKLOAD_NAME --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b
9894 - name : Create test script to execute in batch
9995 run : echo -e '#!/bin/bash \n#SBATCH --unknown-flag=value\n echo "Hello world from a test script!"' > batch.sh
10096 - name : Run a batch job on the cluster
101- run : python3 xpk.py batch --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b batch.sh --ignore-unknown-flags --array 1-5 --nodes 2 --ntasks 3
97+ run : python3 xpk.py batch --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b batch.sh --ignore-unknown-flags --array 1-5 --nodes 2 --ntasks 3
10298 - name : List out the jobs on the cluster
103- run : python3 xpk.py job ls --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b | grep 'xpk-def-app-profile-slurm-'
99+ run : python3 xpk.py job ls --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b | grep 'xpk-def-app-profile-slurm-'
104100 - name : Get created job name
105101 run : |
106- JOB_NAME=$(python3 xpk.py job ls --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b | grep 'xpk-def-app-profile-slurm-' | head -1 | awk '{print $1}')
102+ JOB_NAME=$(python3 xpk.py job ls --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b | grep 'xpk-def-app-profile-slurm-' | head -1 | awk '{print $1}')
107103 echo "JOB_NAME=${JOB_NAME}" >> $GITHUB_ENV
108104 - name : Check job spec
109105 run : |
@@ -115,7 +111,7 @@ jobs:
115111 run : python3 xpk.py job info ${JOB_NAME} | grep -e "Entrypoint environment variables template:" -e "Job name:" -e "Labels:" -e "Mounts:" -e "Pods:" -e "Profile:" -e "Script name:" | wc -l | grep "7"
116112 - name : Cancel the batch job on the cluster
117113 run : |
118- python3 xpk.py job cancel ${JOB_NAME} --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b | grep "job.batch/${JOB_NAME} deleted"
114+ python3 xpk.py job cancel ${JOB_NAME} --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b | grep "job.batch/${JOB_NAME} deleted"
119115 - name : Create shell and exit it immidiatelly
120116 run : |
121117 cat <<'EOF' >> create-shell.exp
@@ -132,7 +128,7 @@ jobs:
132128 run : python3 xpk.py shell stop
133129 - name : Delete the cluster created
134130 if : always()
135- run : python xpk.py cluster delete --cluster $TPU_CLUSTER_NAME --zone=europe-west4 -b --force
131+ run : python xpk.py cluster delete --cluster $TPU_CLUSTER_NAME --zone=us-central2 -b --force
136132
137133 pw-cluster-and-workload :
138134 runs-on : [ubuntu-22.04]
@@ -162,18 +158,18 @@ jobs:
162158 - name : Check xpk installation
163159 run : xpk --help
164160 - name : Create an Pathways-enabled XPK Cluster with 2 x v5p-8 nodepools
165- run : python xpk.py cluster create-pathways --cluster $PATHWAYS_TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=europe-west4 -b --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=4 --reservation='${{ secrets.GCP_TPU_V5_RESERVATION }}' --custom-cluster-arguments="${CLUSTER_NETWORK_ARGUMENTS}"
161+ run : python xpk.py cluster create-pathways --cluster $PATHWAYS_TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=us-central2 -b --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=4 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments="${CLUSTER_NETWORK_ARGUMENTS}"
166162 - name : Create test script to execute in workloads
167163 run : echo -e '#!/bin/bash \n echo "Hello world from a test script!"' > workload.sh
168164 - name : Run a Pathways workload on Ubuntu base image
169- run : python xpk.py workload create-pathways --cluster $PATHWAYS_TPU_CLUSTER_NAME --workload $PATHWAYS_WORKLOAD_NAME --docker-image='marketplace.gcr.io/google/ubuntu2004' --tpu-type=v5p-8 --num-slices=2 --zone=europe-west4 -b --command "echo \"Hello world from a test script! \""
165+ run : python xpk.py workload create-pathways --cluster $PATHWAYS_TPU_CLUSTER_NAME --workload $PATHWAYS_WORKLOAD_NAME --docker-image='marketplace.gcr.io/google/ubuntu2004' --tpu-type=v5p-8 --num-slices=2 --zone=us-central2 -b --command "echo \"Hello world from a test script! \""
170166 - name : Wait for Pathways workload completion and confirm it succeeded
171- run : python3 xpk.py workload list --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=europe-west4 -b --wait-for-job-completion $PATHWAYS_WORKLOAD_NAME --timeout 300
167+ run : python3 xpk.py workload list --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=us-central2 -b --wait-for-job-completion $PATHWAYS_WORKLOAD_NAME --timeout 300
172168 - name : Delete the Pathways workload on the cluster
173- run : python3 xpk.py workload delete --workload $PATHWAYS_WORKLOAD_NAME --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=europe-west4 -b
169+ run : python3 xpk.py workload delete --workload $PATHWAYS_WORKLOAD_NAME --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=us-central2 -b
174170 - name : Delete the Pathways cluster created
175171 if : always()
176- run : python xpk.py cluster delete --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=europe-west4 -b --force
172+ run : python xpk.py cluster delete --cluster $PATHWAYS_TPU_CLUSTER_NAME --zone=us-central2 -b --force
177173
178174 rc-cluster :
179175 runs-on : [ubuntu-22.04]
@@ -203,10 +199,10 @@ jobs:
203199 - name : Check xpk installation
204200 run : xpk --help
205201 - name : Create a RayCluster-enabled XPK Cluster with 2 x v5p-8 nodepools
206- run : python xpk.py cluster create-ray --cluster $RAYCLUSTER_TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=europe-west4 -b --ray-version=2.39.0 --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=4 --reservation='${{ secrets.GCP_TPU_V5_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS}}'
202+ run : python xpk.py cluster create-ray --cluster $RAYCLUSTER_TPU_CLUSTER_NAME --tpu-type=v5p-8 --num-slices=2 --zone=us-central2 -b --ray-version=2.39.0 --default-pool-cpu-machine-type=n1-standard-16 --default-pool-cpu-num-nodes=4 --reservation='${{ secrets.GCP_TPU_V4_RESERVATION }}' --custom-cluster-arguments='${{ secrets.CLUSTER_ARGUMENTS}}'
207203 - name : Delete the RayCluster-enabled XPK cluster
208204 if : always()
209- run : python xpk.py cluster delete --cluster $RAYCLUSTER_TPU_CLUSTER_NAME --zone=europe-west4 -b
205+ run : python xpk.py cluster delete --cluster $RAYCLUSTER_TPU_CLUSTER_NAME --zone=us-central2 -b
210206
211207
212208
0 commit comments