Skip to content
This repository was archived by the owner on May 3, 2023. It is now read-only.

Commit f7ed5d6

Browse files
authored
Merge pull request #5 from pwillie/instancecount
Exclude terminated nodes from get_num_of_instances calculation
2 parents fdde08f + 4bc8294 commit f7ed5d6

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

eks_node_rollout/eks_node_rollout.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def get_latest_instance(asg_client, ec2_client, asg_name, add_time, dry_run=True
131131
return latest_instance
132132

133133

134-
def get_num_of_instances(asg_client, ec2_client, asg_name):
134+
def get_num_of_instances(asg_client, ec2_client, asg_name, exclude_ids):
135135
"""Returns number of instances in an ASG"""
136136

137137
instances = []
@@ -141,7 +141,7 @@ def get_num_of_instances(asg_client, ec2_client, asg_name):
141141
asg_name
142142
]
143143
)
144-
instance_ids = [instance["InstanceId"] for instance in response["AutoScalingGroups"][0]["Instances"]]
144+
instance_ids = [instance["InstanceId"] for instance in response["AutoScalingGroups"][0]["Instances"] if instance["InstanceId"] not in exclude_ids]
145145
response = ec2_client.describe_instances(InstanceIds=instance_ids)
146146
for reservation in response["Reservations"]:
147147
for instance in reservation["Instances"]:
@@ -262,11 +262,12 @@ def rollout_nodes(cluster_name, drain_timeout, dry_run, debug):
262262
disable_autoscaling(asg_client=asg_client, asg_name=asg_name, dry_run=dry_run)
263263

264264
try:
265+
terminated_ids = []
265266
for instance in instances:
266267
before_instance_count = 0
267268
after_instance_count = 0
268269

269-
before_instance_count = get_num_of_instances(asg_client=asg_client, ec2_client=ec2_client, asg_name=asg_name)
270+
before_instance_count = get_num_of_instances(asg_client=asg_client, ec2_client=ec2_client, asg_name=asg_name, exclude_ids=terminated_ids)
270271
add_time = datetime.datetime.now(datetime.timezone.utc)
271272
add_node(asg_client=asg_client, asg_name=asg_name, dry_run=dry_run)
272273
logging.info(f'Waiting for instance to be created...')
@@ -279,7 +280,7 @@ def rollout_nodes(cluster_name, drain_timeout, dry_run, debug):
279280
time.sleep(40) # instance will never be ready before this, don't bother polling yet
280281
wait_for_ready_node(latest_node_name)
281282
logging.info(f'Node {latest_node_name} is now "Ready".')
282-
after_instance_count = get_num_of_instances(asg_client=asg_client, ec2_client=ec2_client, asg_name=asg_name)
283+
after_instance_count = get_num_of_instances(asg_client=asg_client, ec2_client=ec2_client, asg_name=asg_name, exclude_ids=terminated_ids)
283284

284285
# because get_latest_instance() doesn't necessarily return the instance launched by add_node(), this is just a safety precaution to ensure we've actually launched a node
285286
logging.info(f"Had {before_instance_count} instances in {asg_name} before, now have {after_instance_count} instances")
@@ -292,6 +293,7 @@ def rollout_nodes(cluster_name, drain_timeout, dry_run, debug):
292293
print(output.stdout.decode().rstrip())
293294

294295
terminate_node(asg_client, instance["InstanceId"], dry_run)
296+
terminated_ids.append(instance["InstanceId"])
295297
except Exception:
296298
logging.critical(f"Failed to upgrade all nodes in {asg_name}.")
297299
raise

0 commit comments

Comments
 (0)