Skip to content

Commit 0e59c31

Browse files
Merge pull request #24 from oci-hpc/dev
Pull request for marketplace release.
2 parents 8176268 + e320bc8 commit 0e59c31

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1208
-405
lines changed

autoscaling/cleanup.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,10 @@ playbooks_path=$folder/../playbooks/
77
inventory_path=$folder/clusters/$1
88

99
ssh_options="-i ~/.ssh/id_rsa -o StrictHostKeyChecking=no"
10-
11-
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook $playbooks_path/destroy.yml -i $inventory_path/inventory
10+
if [[ "$2" == "FORCE" ]];
11+
then
12+
echo Force Deletion
13+
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook $playbooks_path/destroy.yml -i $inventory_path/inventory -e "force=yes"
14+
else
15+
ANSIBLE_HOST_KEY_CHECKING=False ansible-playbook $playbooks_path/destroy.yml -i $inventory_path/inventory
16+
fi

autoscaling/configure.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ execution=1
1313
playbooks_path=$folder/../playbooks/
1414
inventory_path=$folder/clusters/$1
1515

16+
ssh_options="-i ~/.ssh/cluster.key -o StrictHostKeyChecking=no"
17+
1618
#
1719
# A little waiter function to make sure all the nodes are up before we start configure
1820
#

autoscaling/create_cluster.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ cd $folder/clusters/$2
1313
if [[ $3 == VM.Standard.E3.* ]]
1414
then
1515
sed "s/##NODES##/$1/g;s/##NAME##/$2/g;s/##SHAPE##/VM.Standard.E3.Flex/g;s/##CN##/$4/g;s/##OCPU##/${3:15}/g" $folder/tf_init/variables.tf > variables.tf
16+
elif [[ $3 == VM.Standard.E4.* ]]
17+
then
18+
sed "s/##NODES##/$1/g;s/##NAME##/$2/g;s/##SHAPE##/VM.Standard.E4.Flex/g;s/##CN##/$4/g;s/##OCPU##/${3:15}/g" $folder/tf_init/variables.tf > variables.tf
1619
else
1720
sed "s/##NODES##/$1/g;s/##NAME##/$2/g;s/##SHAPE##/$3/g;s/##CN##/$4/g" $folder/tf_init/variables.tf > variables.tf
1821
fi
@@ -31,5 +34,5 @@ if [ $status -eq 0 ]
3134
else
3235
echo "Could not create $2 with $1 nodes in $runtime seconds"
3336
rm currently_building
34-
$folder/delete_cluster.sh $2
37+
$folder/delete_cluster.sh $2 FORCE
3538
fi

autoscaling/crontab/autoscale_slurm.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,11 @@ try:
5757
shapes['BM.Standard.E2.64']='amd264'
5858
for i in range(1,65):
5959
shapes['VM.Standard.E3.'+str(i)]='amd3'+str(i)
60+
shapes['VM.Standard.E4.'+str(i)]='amd4'+str(i)
6061
shapes['BM.Standard.E3.128']='amd3128'
62+
shapes['BM.Standard.E3.128']='amd4128'
6163
shapes['BM.HPC2.36']='hpc'
64+
shapes['BM.Optimized3.36']='hpc2'
6265

6366
def getstatus_slurm():
6467
cluster_to_build=[]
@@ -71,7 +74,7 @@ try:
7174
if feature.startswith('VM') or feature.startswith('BM'):
7275
shape=feature
7376
break
74-
if shape == "BM.HPC2.36" or shape == "BM.GPU4.8":
77+
if shape == "BM.HPC2.36" or shape == "BM.GPU4.8" or shape == "BM.Optimized3.36":
7578
CN = "true"
7679
else:
7780
CN = "false"
@@ -135,6 +138,8 @@ try:
135138
for shape in shapes.keys():
136139
available_names[shapes[shape]] = ["cluster-"+str(i) for i in range(1,cluster_names_number+1)]
137140
available_names['hpc']=["cluster-"+str(i) for i in range(1,hpc_cluster_names_number+1)]
141+
available_names['hpc2']=["cluster-"+str(i) for i in range(1,hpc_cluster_names_number+1)]
142+
138143
for clusterName in os.listdir(clusters_path):
139144
clusterType=clusterName.split('-')[-1]
140145
clusterNumber='-'.join(clusterName.split('-')[:2])

autoscaling/delete_cluster.sh

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,26 @@ then
1515
echo "The cluster is already being destroyed"
1616
else
1717
echo $1 >> currently_destroying
18-
terraform destroy -auto-approve >> $folder/logs/delete_$1_${date}.log 2>&1
19-
status=$?
20-
end=`date +%s`
21-
runtime=$((end-start))
22-
if [ $status -eq 0 ]
18+
$folder/cleanup.sh $1 >> $folder/logs/delete_$1_${date}.log 2>&1
19+
status_initial_deletion=$?
20+
if [ $status_initial_deletion -eq 0 ] || [[ $2 == FORCE ]]
2321
then
24-
echo "Successfully deleted cluster $1 in $runtime seconds"
25-
cd
26-
rm -rf $folder/clusters/$1
22+
$folder/cleanup.sh $1 FORCE >> $folder/logs/delete_$1_${date}.log 2>&1
23+
terraform destroy -auto-approve >> $folder/logs/delete_$1_${date}.log 2>&1
24+
status=$?
25+
end=`date +%s`
26+
runtime=$((end-start))
27+
if [ $status -eq 0 ]
28+
then
29+
echo "Successfully deleted cluster $1 in $runtime seconds"
30+
cd
31+
rm -rf $folder/clusters/$1
32+
else
33+
echo "Could not delete cluster $1 (Time: $runtime seconds)"
34+
rm currently_destroying
35+
fi
2736
else
28-
echo "Could not delete cluster $1 (Time: $runtime seconds)"
37+
echo "Could not delete cluster $1 (Use FORCE to delete anyway)"
2938
rm currently_destroying
3039
fi
3140
fi

autoscaling/tf_init/bastion_update.tf

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,3 @@ resource "null_resource" "configure" {
5555
command = "timeout 30m ${var.scripts_folder}/configure.sh ${local.cluster_name}"
5656
}
5757
}
58-
59-
resource "null_resource" "on_destroy" {
60-
depends_on = [local_file.inventory]
61-
triggers = {
62-
scripts_folder = var.scripts_folder
63-
cluster_name = local.cluster_name
64-
bastion_path = local.bastion_path
65-
}
66-
provisioner "local-exec" {
67-
command = "${self.triggers.scripts_folder}/cleanup.sh ${self.triggers.cluster_name}"
68-
when = destroy
69-
on_failure = fail
70-
}
71-
}

autoscaling/tf_init/cluster-network-configuration.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ resource "oci_core_instance_configuration" "cluster-network-instance_configurati
1717
ssh_authorized_keys = file("/home/opc/.ssh/id_rsa.pub")
1818
user_data = base64encode(data.template_file.config.rendered)
1919
}
20+
agent_config {
21+
is_management_disabled = true
22+
}
2023
shape = var.cluster_network_shape
2124
source_details {
2225
source_type = "image"

autoscaling/tf_init/instance-pool-configuration.tf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ resource "oci_core_instance_configuration" "instance_pool_configuration" {
1717
ssh_authorized_keys = file("/home/opc/.ssh/id_rsa.pub")
1818
user_data = base64encode(data.template_file.config.rendered)
1919
}
20+
agent_config {
21+
is_management_disabled = true
22+
}
2023
shape = var.instance_pool_shape
2124

2225
dynamic "shape_config" {

autoscaling/tf_init/inventory.tpl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ ${bastion_name} ansible_host=${bastion_ip} ansible_user=opc role=bastion
55
${host} ansible_host=${ip} ansible_user=opc role=compute
66
%{ endfor ~}
77
[nfs]
8-
%{ if scratch_nfs ~}
98
${nfs}
10-
%{ endif ~}
119
[all:children]
1210
bastion
1311
compute

autoscaling/tf_init/locals.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ locals {
2121
// image = (var.cluster_network && var.use_marketplace_image == true) || (var.cluster_network == false && var.use_marketplace_image == false) ? var.image : data.oci_core_images.linux.images.0.id
2222

2323
// is_bastion_flex_shape = var.bastion_shape == "VM.Standard.E3.Flex" ? [var.bastion_ocpus]:[]
24-
is_instance_pool_flex_shape = var.instance_pool_shape == "VM.Standard.E3.Flex" ? [var.instance_pool_ocpus]:[]
25-
24+
is_instance_pool_flex_shape = length(regexall(".*VM.*E[3-4].*Flex$", var.instance_pool_shape)) > 0 ? [var.instance_pool_ocpus]:[]
25+
2626
// bastion_mount_ip = var.bastion_block ? element(concat(oci_core_volume_attachment.bastion_volume_attachment.*.ipv4, [""]), 0) : "none"
2727

2828
scratch_nfs_type = var.cluster_network ? var.scratch_nfs_type_cluster : var.scratch_nfs_type_pool

0 commit comments

Comments
 (0)