Skip to content

Commit 17da7d4

Browse files
Add flag to rm unreachable when reconfiguring
1 parent b5ba793 commit 17da7d4

File tree

2 files changed

+59
-2
lines changed

2 files changed

+59
-2
lines changed

bin/find_reachable_hosts.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
#
4+
# A little waiter function to make sure all the nodes are up before we start configure
5+
#
6+
7+
echo "Checking For SSH"
8+
9+
ssh_options="-i ~/.ssh/cluster.key -o StrictHostKeyChecking=no"
10+
rm $2
11+
12+
for host in $(cat $1) ; do
13+
r=0
14+
echo "validating connection to: ${host}"
15+
if [[ `ssh ${ssh_options} -o ConnectTimeout=15 opc@${host} uptime | grep load | wc -l` > 0 ]] ;
16+
then
17+
echo ${host} >> $2
18+
fi
19+
done

bin/resize.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,11 @@ def reconfigure(comp_ocid,cn_ocid,inventory,CN, crucial=False):
188188
host_to_wait_for=[]
189189
inventory_dict['compute_configured']=[]
190190
inventory_dict['compute_to_add']=[]
191-
for node in instances:
191+
if remove_unreachable:
192+
reachable_instances= getreachable(instances)
193+
else:
194+
reachable_instances=instances
195+
for node in reachable_instances:
192196
name=node['display_name']
193197
ip=node['ip']
194198
nodeline=name+" ansible_host="+ip+" ansible_user=opc role=compute\n"
@@ -215,7 +219,35 @@ def reconfigure(comp_ocid,cn_ocid,inventory,CN, crucial=False):
215219
os.system('sudo mv '+tmp_inventory_reconfig+' '+inventory)
216220
else:
217221
print("The reconfiguration had an error")
218-
print("Try rerunning this command: ansible-playbook -i "+tmp_inventory_reconfig+' '+inventory )
222+
print("Try rerunning this command: ansible-playbook -i "+tmp_inventory_reconfig+' '+playbook )
223+
224+
def getreachable(instances):
225+
input_file=open('/tmp/input_hosts_to_check','w')
226+
for node in instances:
227+
input_file.write(node['ip']+"\n")
228+
input_file.close()
229+
print("/opt/oci-hpc/bin/find_reachable_hosts.sh /tmp/input_hosts_to_check /tmp/reachable_hosts")
230+
my_env = os.environ.copy()
231+
my_env["ANSIBLE_HOST_KEY_CHECKING"] = "False"
232+
p = subprocess.Popen(["/opt/oci-hpc/bin/find_reachable_hosts.sh","/tmp/input_hosts_to_check","/tmp/reachable_hosts"],env=my_env,stderr = subprocess.PIPE, stdout=subprocess.PIPE)
233+
while True:
234+
output = p.stdout.readline().decode()
235+
if output == '' and p.poll() is not None:
236+
break
237+
if output:
238+
print(output.strip())
239+
240+
output_file=open('/tmp/reachable_hosts','r')
241+
reachable_ips=[]
242+
reachable_instances=[]
243+
for line in output_file:
244+
reachable_ips.append(line.strip())
245+
output_file.close()
246+
for ip in reachable_ips:
247+
for node in instances:
248+
if node['ip']==ip:
249+
reachable_instances.append(node)
250+
return reachable_instances
219251

220252
def update_cluster(inventory,playbook,hostfile=None):
221253
my_env = os.environ.copy()
@@ -331,6 +363,7 @@ def updateTFState(inventory,cluster_name,size):
331363
parser.add_argument('--user_logging', help='If present. Use the default settings in ~/.oci/config to connect to the API. Default is using instance_principal',action='store_true',default=False)
332364
parser.add_argument('--force', help='If present. Nodes will be removed even if the destroy playbook failed',action='store_true',default=False)
333365
parser.add_argument('--ansible_crucial', help='If present during reconfiguration, only crucial ansible playbooks will be executed on the live nodes. Non live nodes will be removed',action='store_true',default=False)
366+
parser.add_argument('--remove_unreachable', help='If present, nodes that are not sshable will be removed from the config',action='store_true',default=False)
334367

335368
args = parser.parse_args()
336369

@@ -384,6 +417,11 @@ def updateTFState(inventory,cluster_name,size):
384417
else:
385418
ansible_crucial=args.ansible_crucial
386419

420+
if args.remove_unreachable is None:
421+
remove_unreachable=False
422+
else:
423+
remove_unreachable=args.remove_unreachable
424+
387425
if user_logging:
388426
config_oci = oci.config.from_file()
389427
computeClient = oci.core.ComputeClient(config_oci)

0 commit comments

Comments
 (0)