@@ -30,6 +30,7 @@ gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/g
3030gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step40000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step40000
3131gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step50000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step50000
3232gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step60000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step60000
33+ gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step70000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step70000
3334
3435
3536# in-progress
@@ -39,7 +40,6 @@ gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/g
3940# todo:
4041
4142
42- gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step70000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step70000
4343gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step80000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step80000
4444gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step90000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step90000
4545gsutil rsync -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step100000 gs://bigscience-backups/tr11-176B-ml/checkpoints/global_step100000
@@ -79,20 +79,18 @@ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkp
7979gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step51000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step51000
8080gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step54000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step54000
8181gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step57000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step57000
82+ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step63000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step63000
83+ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step66000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step66000
84+ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step69000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step69000
8285
8386
8487# in-progress
8588
86-
87-
89+ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step72000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step72000
8890
8991
9092# todo:
9193
92- gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step63000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step63000
93- gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step66000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step66000
94- gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step69000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step69000
95- gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step72000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step72000
9694gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step75000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step75000
9795gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step78000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step78000
9896gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSCRATCH/checkpoints/tr11-176B-ml/checkpoints/main/global_step81000 gs://bigscience-backups/tr11-176B-ml/checkpoints-weights/global_step81000
@@ -133,3 +131,15 @@ gsutil rsync -x "bf16.*" -r $six_ALL_CCFRSTORE/checkpoints/tr11-176B-ml/checkpoi
133131# todo:
134132
135133```
134+
135+
136+ ## Tarring the checkpoints in STORE
137+
138+ Since we don't have too many inodes in STORE we ought to tar the checkpoints
139+
140+ ```
141+ cd /gpfsdsstore/projects/rech/six/commun/checkpoints/tr11-176B-ml/checkpoints
142+ cd 1
143+ find * -maxdepth 0 -type d -exec tar cvf {}.tar {} \;
144+
145+ ```
0 commit comments