1212# See the License for the specific language governing permissions and
1313# limitations under the License
1414
15- name : Storage Tests
15+ name : Filestore Tests
1616
1717on :
1818 workflow_call :
3434 run-id :
3535 required : true
3636 type : string
37- env :
38- # Names must be unique in parallel running tests.
39- TPU_FILESTORE_CLUSTER_NAME : ${{inputs.cluster-name}}-fs-attach
40- TPU_FILESTORE_CLUSTER_NAME_CREATE : ${{inputs.cluster-name}}-create
41- STORAGE_NAME_CREATE : ${{inputs.cluster-name}}-test
42- FS_STORAGE_NAME : ${{secrets.INSTANCE_NAME}}-test-${{inputs.run-id}}
43- FS_DELETE_WORKLOAD : " fs-delete-workload"
44- FS_READ_WORKLOAD : " fs-read-workload"
45- FS_WRITE_WORKLOAD : " fs-write-workload"
46- FS_DELETE_WORKLOAD_CREATE : " fs-delete-workload-create"
47- FS_READ_WORKLOAD_CREATE : " fs-read-workload-create"
48- FS_WRITE_WORKLOAD_CREATE : " fs-write-workload-create"
49- CLUSTER_ARGUMENTS : " --network=${{secrets.NETWORK_NAME}} --subnetwork=${{secrets.SUBNETWORK_NAME}} --maintenance-window=23:50"
5037
5138jobs :
52- run-filestore-workload :
39+ run-filestore :
5340 runs-on : [ubuntu-22.04]
41+ env :
42+ # Names must be unique in parallel running tests.
43+ FS_STORAGE_NAME : ${{secrets.INSTANCE_NAME}}-test-${{inputs.run-id}}
44+ FS_WRITE_WORKLOAD : " fs-write-workload"
45+ FS_READ_WORKLOAD : " fs-read-workload"
46+ FS_DELETE_WORKLOAD : " fs-delete-workload"
5447 steps :
5548 - uses : actions/checkout@v4
5649 - uses : actions/setup-python@v5
@@ -87,20 +80,24 @@ jobs:
8780 ${{env.pythonLocation}}
8881 key : xpk-deps-3.10-${{inputs.run-id}}
8982 restore-keys : xpk-deps-3.10-
83+ - name : Install expect package
84+ run : sudo apt-get install expect
9085 - name : Verify xpk installation
9186 run : xpk --help
9287 - name : Authenticate Docker
9388 run : gcloud auth configure-docker --quiet
9489 - name : Fill Filestore manifest file
9590 run : |
96- sed -i 's/PROJECT_NAME/${{secrets.PROJECT_NAME}}/g; s/ZONE/${{inputs.zone}}/g; s/INSTANCE_NAME/${{secrets.INSTANCE_NAME}}/g; s/VOL_NAME/${{secrets.VOL_NAME}}/g; s/IP_ADDRESS/${{secrets.IP_ADDRESS}}/g' ./tests/data/fs-manifest.yaml
91+ sed -i 's/PROJECT_NAME/${{secrets.PROJECT_NAME}}/g; s/ZONE/${{inputs.zone}}/g; s/INSTANCE_NAME/${{secrets.INSTANCE_NAME}}/g; s/VOL_NAME/${{secrets.VOL_NAME}}/g; s/IP_ADDRESS/${{secrets.IP_ADDRESS}}/g' ./tests/data/fs-manifest.yaml
9792 - name : Attach auto-mount GCP Filestore Storage instance
9893 run : |
9994 python3 xpk.py storage attach $FS_STORAGE_NAME --cluster=${{inputs.cluster-name}} --zone=${{inputs.zone}} --type=gcpfilestore \
10095 --auto-mount=true \
10196 --mount-point='/fs-test-mount-point' --readonly=false --manifest='./tests/data/fs-manifest.yaml'
10297 - name : List and verify existing Storages
10398 run : python3 xpk.py storage list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} | tee output.txt | grep $FS_STORAGE_NAME || (echo 'No storage found' && exit 143)
99+ - name : Verify VolumeBundle created
100+ run : kubectl get volumebundle $FS_STORAGE_NAME -o jsonpath='{.spec.containerVolumeMounts[0].mountPath}' | grep '/fs-test-mount-point'
104101 - name : Run workload to write file on filestore
105102 run : python3 xpk.py workload create --workload $FS_WRITE_WORKLOAD --docker-image='marketplace.gcr.io/google/ubuntu2004' --command "mkdir /fs-test-mount-point/$RANDOM_SEED/ && echo 'Test text message' > /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Writing to filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.location}}
106103 - name : Wait for writer workload completion and confirm it succeeded
@@ -109,12 +106,51 @@ jobs:
109106 run : python3 xpk.py workload create --workload $FS_READ_WORKLOAD --command "grep 'Test text message' /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Reading from filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.location}}
110107 - name : Wait for reader workload completion and confirm it succeeded
111108 run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_READ_WORKLOAD --timeout 300
109+ - name : Create batch-read.sh script
110+ run : |
111+ cat <<EOF > batch-read.sh
112+ #!/bin/bash
113+ grep 'Test text message' /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Reading from filestore failed' && exit 143)
114+ EOF
115+ - name : Run a batch-read job on the cluster
116+ run : python3 xpk.py batch --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} batch-read.sh | tee batch-read.log
117+ - name : Get job name
118+ run : |
119+ READ_JOB_NAME=$(cat batch-read.log | grep 'xpk-def-app-profile-slurm-' | awk -F': ' '{print $2}')
120+ echo "READ_JOB_NAME=${READ_JOB_NAME}" >> $GITHUB_ENV
121+ - name : Wait for the batch-read job to finish
122+ run : kubectl wait job.batch/$READ_JOB_NAME --for=condition=Complete --timeout=1m
123+ - name : Cancel the batch-read job
124+ run : python3 xpk.py job cancel $READ_JOB_NAME --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} | grep "job.batch/$READ_JOB_NAME deleted"
125+ - name : Delete batch-read.log file
126+ run : rm batch-read.log
127+ - name : Delete batch-read.sh file
128+ run : rm batch-read.sh
129+ - name : Create shell and exit it immediately
130+ run : |
131+ cat <<EOF >> create-shell.exp
132+ ##!/usr/bin/expect
133+ spawn python3 xpk.py shell --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
134+ expect "/ # "
135+ send "cat /fs-test-mount-point/$RANDOM_SEED/test.txt\n"
136+ expect "Test text message"
137+ send "exit\n"
138+ EOF
139+ chmod +x ./create-shell.exp
140+ expect ./create-shell.exp
141+ - name : Stop the shell
142+ run : python3 xpk.py shell stop --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
143+ - name : Delete create-shell.exp file
144+ run : rm create-shell.exp
112145 - name : Run workload to delete file on filestore
113146 run : python3 xpk.py workload create --workload $FS_DELETE_WORKLOAD --command "rm -rf /fs-test-mount-point/$RANDOM_SEED/test.txt || exit 143" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.location}}
114147 - name : Wait for delete workload completion and confirm it succeeded
115148 run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_DELETE_WORKLOAD --timeout 300
116149 - name : Delete storage
117150 run : python3 xpk.py storage delete $FS_STORAGE_NAME --zone=${{inputs.zone}} --cluster=${{inputs.cluster-name}}
151+ - name : Verify VolumeBundle deleted
152+ run : |
153+ ! kubectl get volumebundle | grep $FS_STORAGE_NAME
118154 - name : Delete the writer workload on the cluster
119155 if : always()
120156 run : python3 xpk.py workload delete --workload $FS_WRITE_WORKLOAD --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
@@ -124,9 +160,16 @@ jobs:
124160 - name : Delete the delete workload on the cluster
125161 if : always()
126162 run : python3 xpk.py workload delete --workload $FS_DELETE_WORKLOAD --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
127-
128- filestore-create :
163+
164+ run-filestore-create :
165+ needs : [run-filestore]
129166 runs-on : [ubuntu-22.04]
167+ env :
168+ # Names must be unique in parallel running tests.
169+ FS_STORAGE_NAME : ${{inputs.cluster-name}}-test
170+ FS_WRITE_WORKLOAD : " fs-write-workload-create"
171+ FS_READ_WORKLOAD : " fs-read-workload-create"
172+ FS_DELETE_WORKLOAD : " fs-delete-workload-create"
130173 steps :
131174 - uses : actions/checkout@v4
132175 - uses : actions/setup-python@v5
@@ -163,40 +206,83 @@ jobs:
163206 ${{env.pythonLocation}}
164207 key : xpk-deps-3.10-${{inputs.run-id}}
165208 restore-keys : xpk-deps-3.10-
209+ - name : Install expect package
210+ run : sudo apt-get install expect
166211 - name : Verify xpk installation
167212 run : xpk --help
168213 - name : Authenticate Docker
169214 run : gcloud auth configure-docker --quiet
170215 - name : Create auto-mount GCP Filestore Storage instance
171216 run : |
172- python3 xpk.py storage create $STORAGE_NAME_CREATE --cluster=${{inputs.cluster-name}} --zone=${{inputs.zone}} --type=gcpfilestore \
217+ python3 xpk.py storage create $FS_STORAGE_NAME --cluster=${{inputs.cluster-name}} --zone=${{inputs.zone}} --type=gcpfilestore \
173218 --auto-mount=true --vol=vol1 --size=1024 --tier=BASIC_HDD \
174219 --mount-point='/fs-test-mount-point' --readonly=false
175220 - name : List and verify existing Storages
176- run : python3 xpk.py storage list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} | tee output.txt | grep $STORAGE_NAME_CREATE || (echo 'No storage found' && exit 143)
221+ run : python3 xpk.py storage list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} | tee output.txt | grep $FS_STORAGE_NAME || (echo 'No storage found' && exit 143)
222+ - name : Verify VolumeBundle created
223+ run : kubectl get volumebundle $FS_STORAGE_NAME -o jsonpath='{.spec.containerVolumeMounts[0].mountPath}' | grep '/fs-test-mount-point'
177224 - name : Run workload to write file on filestore
178- run : python3 xpk.py workload create --workload $FS_WRITE_WORKLOAD_CREATE --num-slices=1 --docker-image='marketplace.gcr.io/google/ubuntu2004' --command "mkdir /fs-test-mount-point/$RANDOM_SEED/ && echo 'Test text message' > /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Writing to filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
225+ run : python3 xpk.py workload create --workload $FS_WRITE_WORKLOAD --num-slices=1 --docker-image='marketplace.gcr.io/google/ubuntu2004' --command "mkdir /fs-test-mount-point/$RANDOM_SEED/ && echo 'Test text message' > /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Writing to filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
179226 - name : Wait for writer workload completion and confirm it succeeded
180- run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_WRITE_WORKLOAD_CREATE --timeout 300
227+ run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_WRITE_WORKLOAD --timeout 300
181228 - name : Run workload to read file on filestore
182- run : python3 xpk.py workload create --workload $FS_READ_WORKLOAD_CREATE --num-slices=1 --command "grep 'Test text message' /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Reading from filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
229+ run : python3 xpk.py workload create --workload $FS_READ_WORKLOAD --num-slices=1 --command "grep 'Test text message' /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Reading from filestore failed' && exit 143)" --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
183230 - name : Wait for reader workload completion and confirm it succeeded
184- run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_READ_WORKLOAD_CREATE --timeout 300
231+ run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_READ_WORKLOAD --timeout 300
232+ - name : Create batch-read.sh script
233+ run : |
234+ cat <<EOF > batch-read.sh
235+ #!/bin/bash
236+ grep 'Test text message' /fs-test-mount-point/$RANDOM_SEED/test.txt || (echo 'Reading from filestore failed' && exit 143)
237+ EOF
238+ - name : Run a batch-read job on the cluster
239+ run : python3 xpk.py batch --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} batch-read.sh | tee batch-read.log
240+ - name : Get job name
241+ run : |
242+ READ_JOB_NAME=$(cat batch-read.log | grep 'xpk-def-app-profile-slurm-' | awk -F': ' '{print $2}')
243+ echo "READ_JOB_NAME=${READ_JOB_NAME}" >> $GITHUB_ENV
244+ - name : Wait for the batch-read job to finish
245+ run : kubectl wait job.batch/$READ_JOB_NAME --for=condition=Complete --timeout=1m
246+ - name : Cancel the batch-read job
247+ run : python3 xpk.py job cancel $READ_JOB_NAME --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} | grep "job.batch/$READ_JOB_NAME deleted"
248+ - name : Delete batch-read.log file
249+ run : rm batch-read.log
250+ - name : Delete batch-read.sh file
251+ run : rm batch-read.sh
252+ - name : Create shell and exit it immediately
253+ run : |
254+ cat <<EOF >> create-shell.exp
255+ ##!/usr/bin/expect
256+ spawn python3 xpk.py shell --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
257+ expect "/ # "
258+ send "cat /fs-test-mount-point/$RANDOM_SEED/test.txt\n"
259+ expect "Test text message"
260+ send "exit\n"
261+ EOF
262+ chmod +x ./create-shell.exp
263+ expect ./create-shell.exp
264+ - name : Stop the shell
265+ run : python3 xpk.py shell stop --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
266+ - name : Delete create-shell.exp file
267+ run : rm create-shell.exp
185268 - name : Run workload to delete file on filestore
186- run : python3 xpk.py workload create --workload $FS_DELETE_WORKLOAD_CREATE --command "rm -rf /fs-test-mount-point/$RANDOM_SEED/test.txt || exit 143" --num-slices=1 --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
269+ run : python3 xpk.py workload create --workload $FS_DELETE_WORKLOAD --command "rm -rf /fs-test-mount-point/$RANDOM_SEED/test.txt || exit 143" --num-slices=1 --cluster ${{inputs.cluster-name}} --tpu-type=${{inputs.tpu-type}} --zone ${{inputs.zone}}
187270 - name : Wait for delete workload completion and confirm it succeeded
188- run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_DELETE_WORKLOAD_CREATE --timeout 300
271+ run : python3 xpk.py workload list --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}} --wait-for-job-completion $FS_DELETE_WORKLOAD --timeout 300
189272 - name : Delete storage
190- run : python3 xpk.py storage delete $STORAGE_NAME_CREATE --zone=${{inputs.zone}} --cluster=${{inputs.cluster-name}}
273+ run : python3 xpk.py storage delete $FS_STORAGE_NAME --zone=${{inputs.zone}} --cluster=${{inputs.cluster-name}}
274+ - name : Verify VolumeBundle deleted
275+ run : |
276+ ! kubectl get volumebundle | grep $FS_STORAGE_NAME
191277 - name : Delete the filestore instance
192278 if : always()
193- run : gcloud filestore instances delete $STORAGE_NAME_CREATE --zone=${{inputs.zone}} --force
279+ run : gcloud filestore instances delete $FS_STORAGE_NAME --zone=${{inputs.zone}} --force
194280 - name : Delete the writer workload on the cluster
195281 if : always()
196- run : python3 xpk.py workload delete --workload $FS_WRITE_WORKLOAD_CREATE --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
282+ run : python3 xpk.py workload delete --workload $FS_WRITE_WORKLOAD --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
197283 - name : Delete the reader workload on the cluster
198284 if : always()
199- run : python3 xpk.py workload delete --workload $FS_READ_WORKLOAD_CREATE --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
285+ run : python3 xpk.py workload delete --workload $FS_READ_WORKLOAD --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
200286 - name : Delete the delete workload on the cluster
201287 if : always()
202- run : python3 xpk.py workload delete --workload $FS_DELETE_WORKLOAD_CREATE --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
288+ run : python3 xpk.py workload delete --workload $FS_DELETE_WORKLOAD --cluster ${{inputs.cluster-name}} --zone=${{inputs.zone}}
0 commit comments