3030 # Cluster env settings
3131 # net0 and filestore ranges must not overlap
3232 net0_range : 192.168.0.0/19
33- filestore_ip_range : 192.168.32.0/24
3433 net1_range : 192.168.64.0/18
3534 rdma_net_range : 192.168.128.0/18
3635 # Cluster Settings
@@ -49,21 +48,15 @@ vars:
4948 a4h_dws_flex_enabled : false
5049 a4h_enable_spot_vm : false
5150
52- # To enable Managed-Lustre please uncomment this section and fill out the settings.
53- # Additionally, please uncomment the private_service_access and managed-lustre modules.
54- # Managed Lustre is only supported in specific regions and zones
55- # Please refer https://cloud.google.com/managed-lustre/docs/locations
56-
5751 # Managed-Lustre instance name. This should be unique for each deployment.
58- # lustre_instance_id: lustre-instance
52+ lustre_instance_id : lustre-instance
5953
6054 # The values of size_gib and per_unit_storage_throughput are co-related
6155 # Please refer https://cloud.google.com/managed-lustre/docs/create-instance#performance-tiers
6256 # Storage capacity of the lustre instance in GiB
63- # lustre_size_gib: 36000
64-
57+ lustre_size_gib : 36000
6558 # Maximum throughput of the lustre instance in MBps per TiB
66- # per_unit_storage_throughput: 500
59+ per_unit_storage_throughput : 500
6760
6861deployment_groups :
6962- group : image-env
@@ -127,7 +120,7 @@ deployment_groups:
127120 "install_cuda": false,
128121 "install_gcsfuse": true,
129122 "install_lustre": false,
130- "install_managed_lustre": false ,
123+ "install_managed_lustre": true ,
131124 "install_nvidia_repo": true,
132125 "install_ompi": true,
133126 "allow_kernel_upgrades": false,
@@ -329,43 +322,26 @@ deployment_groups:
329322 ip_range : $(vars.rdma_net_range)
330323 region : $(vars.region)
331324
325+ # To use Managed Lustre as for the shared /home directory:
326+ # 1. Comment out the filestore block above and the`filestore_ip_range` line in the vars block.
327+ - id : private_service_access
328+ source : modules/network/private-service-access
329+ use : [a4high-slurm-net-0]
330+
332331 - id : homefs
333- source : modules/file-system/filestore
332+ source : modules/file-system/managed-lustre
334333 use :
335334 - a4high-slurm-net-0
335+ - private_service_access
336336 settings :
337- filestore_tier : HIGH_SCALE_SSD
338- size_gb : 10240
337+ size_gib : $(vars.lustre_size_gib)
338+ name : $(vars.lustre_instance_id)
339339 local_mount : /home
340- reserved_ip_range : $(vars.filestore_ip_range)
341- deletion_protection :
342- enabled : true
343- reason : Avoid data loss
340+ remote_mount : lustrefs
341+ per_unit_storage_throughput : $(vars.per_unit_storage_throughput)
344342 outputs :
345343 - network_storage
346344
347- # To use Managed Lustre as for the shared /home directory:
348- # 1. Comment out the filestore block above and the`filestore_ip_range` line in the vars block.
349- # 2. Uncomment the managed-lustre and private-service-access blocks
350- # 3. Change the value for "install_managed_lustre" in /var/tmp/slurm_vars.json above to true
351- # - id: private_service_access
352- # source: modules/network/private-service-access
353- # use: [a4high-slurm-net-0]
354-
355- # - id: homefs
356- # source: modules/file-system/managed-lustre
357- # use:
358- # - a4high-slurm-net-0
359- # - private_service_access
360- # settings:
361- # size_gib: $(vars.lustre_size_gib)
362- # name: $(vars.lustre_instance_id)
363- # local_mount: /home
364- # remote_mount: lustrefs
365- # per_unit_storage_throughput: $(vars.per_unit_storage_throughput)
366- # outputs:
367- # - network_storage
368-
369345 # The following four modules create and mount a Cloud Storage Bucket with
370346 # gcsfuse. They are optional but recommended for many use cases.
371347 # (Optional) The following creates a GCS bucket that will be mounted
0 commit comments