diff --git a/tools/cloud-build/daily-tests/builds/cloud-batch.yaml b/tools/cloud-build/daily-tests/builds/cloud-batch.yaml index da50e6f8f0..a82d813526 100644 --- a/tools/cloud-build/daily-tests/builds/cloud-batch.yaml +++ b/tools/cloud-build/daily-tests/builds/cloud-batch.yaml @@ -55,6 +55,7 @@ steps: BLUEPRINT=examples/serverless-batch.yaml bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ --extra-vars="@tools/cloud-build/daily-tests/tests/cloud-batch.yml" diff --git a/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml index 520490c52e..6de96c0789 100644 --- a/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml @@ -97,6 +97,7 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" cat $${EXAMPLE_BP} + python3 tools/modify_vpc.py "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/gke-h4d.yaml b/tools/cloud-build/daily-tests/builds/gke-h4d.yaml index 15d564d283..8a9ac8a643 100644 --- a/tools/cloud-build/daily-tests/builds/gke-h4d.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-h4d.yaml @@ -79,6 +79,7 @@ steps: echo ' outputs: [instructions]' >> $${EXAMPLE_BP} bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" + python3 tools/modify_vpc.py "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ --extra-vars="@tools/cloud-build/daily-tests/tests/gke-h4d.yml" diff --git a/tools/cloud-build/daily-tests/builds/h4d-vm.yaml b/tools/cloud-build/daily-tests/builds/h4d-vm.yaml index 41adb41ff9..17f23f4d1b 100644 --- a/tools/cloud-build/daily-tests/builds/h4d-vm.yaml +++ b/tools/cloud-build/daily-tests/builds/h4d-vm.yaml @@ -67,6 +67,7 @@ steps: REGION="$${ZONE%-*}" BUILD_ID_SHORT=$${BUILD_ID:0:6} BLUEPRINT="/workspace/examples/h4d-vm.yaml" + python3 tools/modify_vpc.py "$${BLUEPRINT}" sed -i -e '/deletion_protection:/{n;s/enabled: true/enabled: false/}' $${BLUEPRINT} sed -i -e '/reason:/d' $${BLUEPRINT} sed -i '/ - id: h4d-vms/,/ - id: wait-for-vms/ { / settings:/a \ @@ -83,7 +84,6 @@ steps: echo "INFO: Using $${H4D_VARS_FILE} as it is for SPOT provisioning." fi bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: cluster-net-0/,/- id: cluster-rdma-net-0/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/hcls.yaml b/tools/cloud-build/daily-tests/builds/hcls.yaml index 6e1e51cb1c..6d32d51dd8 100644 --- a/tools/cloud-build/daily-tests/builds/hcls.yaml +++ b/tools/cloud-build/daily-tests/builds/hcls.yaml @@ -68,6 +68,7 @@ steps: BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} BLUEPRINT="examples/hcls-blueprint.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ --extra-vars="@tools/cloud-build/daily-tests/tests/hcls.yml" diff --git a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml b/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml index 3fefb8ae6e..98d30937ca 100644 --- a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml @@ -60,6 +60,7 @@ steps: BLUEPRINT="examples/hpc-enterprise-slurm.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm.yml" diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml index f9c3e90b86..5cb32226af 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml @@ -86,7 +86,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: sysnet/,/- id: gpunets/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml index 74e917d617..52c1597607 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml @@ -64,7 +64,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: sysnet/,/- id: gpunets/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml index dd42a42c58..c2fb9c2f2f 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml @@ -88,6 +88,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml index 912c2f9918..5b929b6a6c 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml @@ -66,6 +66,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml index cd83af290f..de2d50a262 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml @@ -26,7 +26,6 @@ tags: - m.vpc - slurm6 - timeout: 14400s # 4hr steps: # While using static network names we are gaurding against more than 1 instance running at a time (for multi-group tests) @@ -77,7 +76,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a3ultra-slurm-net-0/,/- id: a3ultra-slurm-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml index 678b9eef7a..b010a0d200 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml @@ -59,7 +59,7 @@ steps: sed -i -e '/deletion_protection:/{n;s/enabled: true/enabled: false/}' $${BLUEPRINT} sed -i -e '/reason:/d' $${BLUEPRINT} bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a3ultra-net-0/,/- id: a3ultra-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml index 7fb13d7b4c..7de8e64cef 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml @@ -84,7 +84,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a3ultra-net-0/,/- id: a3ultra-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml index da1706136e..c8ac4bbce8 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml @@ -90,7 +90,7 @@ steps: echo "INFO: Using $${SLURM_VARS_FILE} as it is for SPOT provisioning." fi bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a3ultra-slurm-net-0/,/- id: a3ultra-slurm-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml index 10d3ca3118..65814af7a0 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml @@ -64,7 +64,7 @@ steps: sed -i -e '/deletion_protection:/{n;s/enabled: true/enabled: false/}' $${BLUEPRINT} sed -i -e '/reason:/d' $${BLUEPRINT} bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a3ultra-slurm-net-0/,/- id: a3ultra-slurm-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml index 87397aa5b3..369449a32d 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml @@ -26,7 +26,6 @@ tags: - m.vpc - slurm6 - timeout: 14400s # 4hr steps: # While using static network names we are gaurding against more than 1 instance running at a time (for multi-group tests) @@ -76,7 +75,7 @@ steps: echo "INFO: Using $${VARS_FILE} as it is for SPOT provisioning." fi - sed -i -e '/- id: a4high-slurm-net-0/,/- id: a4high-slurm-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml index 807a6d9dc9..ca075ea1b6 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml @@ -89,7 +89,7 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: a4high-slurm-net-0/,/- id: a4high-slurm-net-1/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml index 6036182df0..38119aa0e2 100644 --- a/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml @@ -82,7 +82,7 @@ steps: fi bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: net0/,/- id: homefs/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml index f3f010e2e7..346a467f57 100644 --- a/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml @@ -81,7 +81,7 @@ steps: fi bash tools/add_ttl_label.sh $${BLUEPRINT} - sed -i -e '/- id: h4d-slurm-net-0/,/- id: h4d-rdma-net/ s/network_name: .*/network_name: $(vars.base_network_name)/' $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml index 970f633b01..a38b4cc999 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml @@ -57,6 +57,7 @@ steps: BLUEPRINT="examples/hpc-slurm.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} + python3 tools/modify_vpc.py "$${BLUEPRINT}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-rocky8.yml" diff --git a/tools/cloud-build/daily-tests/tests/h4d-vm.yml b/tools/cloud-build/daily-tests/tests/h4d-vm.yml index b31b181e28..1cac5a48f5 100644 --- a/tools/cloud-build/daily-tests/tests/h4d-vm.yml +++ b/tools/cloud-build/daily-tests/tests/h4d-vm.yml @@ -19,7 +19,7 @@ test_name: h4d-jbvms deployment_name: h4d-jbvms-{{ build }} workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/h4d-vm.yaml" -network: "{{ test_name }}" +network: "{{ test_name }}-net" remote_node: "{{ deployment_name }}-0" post_deploy_tests: - test-validation/test-irdma.yml diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-onspot-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-onspot-slurm.yml index dacac701e3..3a8dfc1c12 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-onspot-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-onspot-slurm.yml @@ -23,7 +23,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-highgpu-8g/a3high-slurm-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ deployment_name }}-net-0" nccl_test_path: "examples/machine-learning/a3-highgpu-8g/nccl-tests" sub_network: "{{ deployment_name }}-sub-0" post_deploy_tests: @@ -46,7 +46,6 @@ custom_vars: a3high_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" tcpx_kernel_login: "{{ tcpx_kernel_login }}" diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml index cf5305ba42..5aea242458 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml @@ -23,7 +23,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-highgpu-8g/a3high-slurm-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ deployment_name }}-net-0" nccl_test_path: "examples/machine-learning/a3-highgpu-8g/nccl-tests" sub_network: "{{ deployment_name }}-sub-0" post_deploy_tests: @@ -43,7 +43,6 @@ custom_vars: mounts: - /home cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" tcpx_kernel_login: "{{ tcpx_kernel_login }}" diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-custom-blueprint-test.yml b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-custom-blueprint-test.yml index d08c9de2c8..50a1618073 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-custom-blueprint-test.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-custom-blueprint-test.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/tools/cloud-build/daily-tests/blueprints/a3ultra-custom-image-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" nccl_test_path: "examples/machine-learning/a3-ultragpu-8g/nccl-tests" post_deploy_tests: - test-validation/test-mounts.yml @@ -46,7 +46,6 @@ custom_vars: a3ultra_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" @@ -54,4 +53,5 @@ cli_deployment_vars: a3u_cluster_size: 2 instance_image_project: "{{ instance_image_project }}" instance_image_family: "{{ instance_image_family }}" + base_network_name: 0vpc-$(build) a3u_enable_spot_vm: true diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml index be70e6b4a8..4a8a23730b 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml" region: europe-west1 zone: europe-west1-b -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" remote_node: "{{ hostname_prefix }}-0" post_deploy_tests: - test-validation/test-mounts.yml @@ -31,8 +31,8 @@ custom_vars: mounts: - /home cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" a3u_reservation_name: hpc-exr-2 a3u_provisioning_model: RESERVATION_BOUND + base_network_name: 0vpc-$(build) diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-jbvms.yml b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-jbvms.yml index bcf8cc3e13..e8bdf1e586 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-jbvms.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-jbvms.yml @@ -20,7 +20,7 @@ deployment_name: a3u-spot-jbvms-{{ build }} hostname_prefix: "{{ deployment_name }}-beowulf" workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml" -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" remote_node: "{{ hostname_prefix }}-0" post_deploy_tests: - test-validation/test-mounts.yml @@ -32,7 +32,7 @@ custom_vars: a3ultra_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" a3u_provisioning_model: SPOT + base_network_name: 0vpc-$(build) diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-slurm.yml index a83fcf9d3e..ce649b9ffe 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-onspot-slurm.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" nccl_test_path: "examples/machine-learning/a3-ultragpu-8g/nccl-tests" post_deploy_tests: - test-validation/test-mounts.yml @@ -46,10 +46,10 @@ custom_vars: a3ultra_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" disk_size_gb: 100 a3u_cluster_size: 2 + base_network_name: 0vpc-$(build) a3u_enable_spot_vm: true diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml index f5943730fd..c1a75122c6 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml @@ -24,7 +24,7 @@ login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" region: europe-west1 zone: europe-west1-b -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" post_deploy_tests: - test-validation/test-mounts.yml - test-validation/test-partitions.yml @@ -43,10 +43,10 @@ custom_vars: - /home - /gcs cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" disk_size_gb: 100 a3u_cluster_size: 2 a3u_reservation_name: hpc-exr-2 + base_network_name: 0vpc-$(build) diff --git a/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-custom-blueprint-test.yml b/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-custom-blueprint-test.yml index 4edd939e8f..0f86a4597c 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-custom-blueprint-test.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-custom-blueprint-test.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/tools/cloud-build/daily-tests/blueprints/a4high-custom-image-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" post_deploy_tests: - test-validation/test-mounts.yml - test-validation/test-partitions.yml @@ -44,11 +44,11 @@ custom_vars: a4high_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" disk_size_gb: 100 + base_network_name: 0vpc-$(build) a4h_enable_spot_vm: true a4h_cluster_size: 2 diff --git a/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-onspot-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-onspot-slurm.yml index 447de292b2..993a1777b9 100644 --- a/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-onspot-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-a4-highgpu-onspot-slurm.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/machine-learning/a4-highgpu-8g/a4high-slurm-blueprint.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ test_name }}-net-0" post_deploy_tests: - test-validation/test-mounts.yml - test-validation/test-partitions.yml @@ -44,10 +44,10 @@ custom_vars: a4high_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" disk_size_gb: 100 a4h_cluster_size: 2 + base_network_name: 0vpc-$(build) a4h_enable_spot_vm: true diff --git a/tools/cloud-build/daily-tests/tests/ml-g4-onspot-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-g4-onspot-slurm.yml index 9feb8e7ed3..48ebe17fd6 100644 --- a/tools/cloud-build/daily-tests/tests/ml-g4-onspot-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-g4-onspot-slurm.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/ml-slurm-g4.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ deployment_name }}-net-0" post_deploy_tests: - test-validation/test-mounts.yml - test-validation/test-partitions.yml @@ -41,7 +41,6 @@ custom_vars: g4_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' region: "{{ region }}" zone: "{{ zone }}" slurm_cluster_name: "{{ slurm_cluster_name }}" diff --git a/tools/cloud-build/daily-tests/tests/ml-h4d-onspot-slurm.yml b/tools/cloud-build/daily-tests/tests/ml-h4d-onspot-slurm.yml index 014624d36f..85e9ba1a7e 100644 --- a/tools/cloud-build/daily-tests/tests/ml-h4d-onspot-slurm.yml +++ b/tools/cloud-build/daily-tests/tests/ml-h4d-onspot-slurm.yml @@ -22,7 +22,7 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/hpc-slurm-h4d/hpc-slurm-h4d.yaml" login_node: "{{ slurm_cluster_name }}-slurm-login-*" controller_node: "{{ slurm_cluster_name }}-controller" -network: "{{ test_name }}" +network: "{{ deployment_name }}-net" post_deploy_tests: - test-validation/test-mounts.yml - test-validation/test-partitions.yml @@ -39,7 +39,6 @@ custom_vars: h4d_onspot: true enable_spot: true cli_deployment_vars: - base_network_name: '{{ test_name }}' deployment_name: "{{ deployment_name }}" region: "{{ region }}" zone: "{{ zone }}" diff --git a/tools/modify_vpc.py b/tools/modify_vpc.py new file mode 100644 index 0000000000..d7f47906b4 --- /dev/null +++ b/tools/modify_vpc.py @@ -0,0 +1,30 @@ +import yaml +import sys + +def modify_vpcs(blueprint_path): + with open(blueprint_path, 'r') as f: + data = yaml.safe_load(f) + + vpc_count = 0 + # Iterate through all deployment groups and modules + if data and 'deployment_groups' in data: + for group in data['deployment_groups']: + for module in group.get('modules', []): + # Identify VPC modules by their source path + if 'modules/network/vpc' in module.get('source', ''): + if 'settings' not in module: + module['settings'] = {} + # Set the consecutive name using the $(vars.test_name) variable + module['settings']['network_name'] = f"$(vars.test_name)-{vpc_count}" + print(f"Updated module '{module.get('id')}' to: $(vars.test_name)-{vpc_count}") + vpc_count += 1 + + with open(blueprint_path, 'w') as f: + yaml.dump(data, f, sort_keys=False) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + modify_vpcs(sys.argv[1]) +