Skip to content
This repository was archived by the owner on Aug 22, 2024. It is now read-only.

Commit 651b216

Browse files
Improvements to threshold calculation and ability to cope with API throttling (#14)
* filter attached volumes to only count those added by ebs-autoscale * sleep to avoid RequestLimitExceeded on DescribeVolumes which is causing failure to attach drive * Further changes to avoid request limits Co-authored-by: Mark Schreiber <mrschre@amazon.com>
1 parent c43a555 commit 651b216

File tree

2 files changed

+114
-33
lines changed

2 files changed

+114
-33
lines changed

bin/create-ebs-volume

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -161,25 +161,60 @@ function create_and_attach_volume() {
161161
local availability_zone=$(get_metadata placement/availability-zone)
162162
local region=${availability_zone%?}
163163

164-
local attached_volumes=$(
165-
aws ec2 describe-volumes \
164+
local max_attempts=10
165+
local attached_volumes=""
166+
167+
for i in $(eval echo "{0..$max_attempts}") ; do
168+
attached_volumes=$(
169+
aws ec2 describe-volumes \
166170
--region $region \
167171
--filters "Name=attachment.instance-id,Values=$instance_id"
168-
)
172+
)
169173

170-
local created_volumes=$(
171-
aws ec2 describe-volumes \
172-
--region $region \
173-
--filters "Name=tag:source-instance,Values=$instance_id"
174-
)
174+
if [ $? -eq 0 ]; then
175+
break
176+
elif [ $i -eq $max_attempts ]; then
177+
logthis "Could not determine the number of attached_volumes after $i attempts. Last response was: $attached_volumes"
178+
break
179+
fi
180+
sleep $(( 2 ** i ))
181+
done
175182

176-
local total_created_size=$(
177-
aws ec2 describe-volumes \
178-
--region $region \
179-
--filters "Name=tag:source-instance,Values=$instance_id" \
180-
--query 'sum(Volumes[].Size)' \
181-
--output text
182-
)
183+
local created_volumes=""
184+
for i in $(eval echo "{0..$max_attempts}") ; do
185+
created_volumes=$(
186+
aws ec2 describe-volumes \
187+
--region $region \
188+
--filters "Name=tag:source-instance,Values=$instance_id"
189+
)
190+
191+
if [ $? -eq 0 ]; then
192+
break
193+
elif [ $i -eq $max_attempts ]; then
194+
logthis "Could not determine the number of created_volumes after $i attempts. Last response was: $created_volumes"
195+
break
196+
fi
197+
sleep $(( 2 ** i ))
198+
done
199+
200+
local total_created_size=""
201+
for i in $(eval echo "{0..$max_attempts}") ; do
202+
total_created_size=$(
203+
aws ec2 describe-volumes \
204+
--region $region \
205+
--filters "Name=tag:source-instance,Values=$instance_id" \
206+
--query 'sum(Volumes[].Size)' \
207+
--output text
208+
)
209+
210+
if [ $? -eq 0 ]; then
211+
break
212+
elif [ $i -eq $max_attempts ]; then
213+
logthis "Could not determine the total_created_size after $i attempts. Last response was: $total_created_size"
214+
break
215+
fi
216+
sleep $(( 2 ** i ))
217+
done
183218

184219
# check how much EBS storage this instance has created
185220
if [ "$total_created_size" -ge "$MAX_TOTAL_EBS_SIZE" ]; then
@@ -209,14 +244,27 @@ function create_and_attach_volume() {
209244
if [ "$TYPE" == "io1" ]; then volume_opts="$volume_opts --iops $IOPS"; fi
210245
if [ "$ENCRYPTED" == "1" ]; then volume_opts="$volume_opts --encrypted"; fi
211246
local timestamp=$(date "+%F %T UTC%z") # YYYY-mm-dd HH:MM:SS UTC+0000
212-
local volume=$(\
213-
aws ec2 create-volume \
214-
--region $region \
215-
--availability-zone $availability_zone \
216-
$volume_opts \
217-
--tag-specification "ResourceType=volume,Tags=[{Key=source-instance,Value=$instance_id},{Key=amazon-ebs-autoscale-creation-time,Value=$timestamp}]" \
218-
2> $tmpfile
219-
)
247+
248+
local volume=""
249+
for i in $(eval echo "{0..$max_attempts}") ; do
250+
local volume=$(\
251+
aws ec2 create-volume \
252+
--region $region \
253+
--availability-zone $availability_zone \
254+
$volume_opts \
255+
--tag-specification "ResourceType=volume,Tags=[{Key=source-instance,Value=$instance_id},{Key=amazon-ebs-autoscale-creation-time,Value=$timestamp}]" \
256+
2> $tmpfile
257+
)
258+
259+
if [ $? -eq 0 ]; then
260+
break
261+
elif [ $i -eq $max_attempts ]; then
262+
logthis "Could not create a volume after $i attempts. Last response was: $volume"
263+
break
264+
fi
265+
sleep $(( 2 ** i ))
266+
done
267+
220268
local volume_id=`echo $volume | jq -r '.VolumeId'`
221269

222270
if [ -z "$volume_id" ]; then
@@ -230,13 +278,20 @@ function create_and_attach_volume() {
230278

231279
logthis "created volume: $volume_id [ $volume_opts ]"
232280

233-
aws ec2 wait volume-available --region $region --volume-ids $volume_id
234-
logthis "volume $volume_id available"
281+
# In theory this shouldn't need to loop as aws ec2 wait will retry but I have seen it exceed request limits
282+
for i in {1..3} ; do
283+
if aws ec2 wait volume-available --region $region --volume-ids $volume_id; then
284+
logthis "volume $volume_id available"
285+
break
286+
fi
287+
done
235288

236289
# Need to assure that the created volume is successfully attached to be
237290
# cost efficient. If attachment fails, delete the volume.
238291
set +e
239292
logthis "attaching volume $volume_id"
293+
294+
sleep 1
240295
aws ec2 attach-volume \
241296
--region $region \
242297
--device $device \

bin/ebs-autoscale

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,40 @@ until [ -d "${MOUNTPOINT}" ]; do
5656
done
5757

5858
get_num_devices() {
59-
# determine the number of attached devices on this instance
60-
local attached_volumes=$(
61-
aws ec2 describe-volumes \
62-
--region $AWS_REGION \
63-
--filters Name=attachment.instance-id,Values=$INSTANCE_ID
64-
)
65-
66-
echo "`echo $attached_volumes | jq '.Volumes | length'`"
59+
# This tag is added to all devices attached by this ebs-autoscale in the create-ebs-volume script it's presence indicates
60+
# a device that has been added by auto-expansion rather than an EBS volume attached for other reasons or at startup.
61+
TAG=amazon-ebs-autoscale-creation-time
62+
63+
# determine the number of devices attached by ebs-autoscale on this instance. Volumes attached in other ways are
64+
# excluded as they aren't relevant to autoscaling
65+
local attached_volumes=""
66+
local max_attempts=5
67+
68+
# By waiting until the attached_volumes value is >=0 we will retry with backoff hopefully avoiding request limits
69+
# eventually getting a usable value. The >= 0 test is really a test that we got an integer response
70+
for i in $(eval echo "{0..$max_attempts}") ; do
71+
local attached_volumes_response=""
72+
73+
attached_volumes_response=$(
74+
aws ec2 describe-volumes \
75+
--region $AWS_REGION \
76+
--filters Name=attachment.instance-id,Values=$INSTANCE_ID Name=tag-key,Values=$TAG
77+
)
78+
79+
attached_volumes=$(echo "$attached_volumes_response" | jq '.Volumes | length')
80+
if [ "$attached_volumes" -ge 0 ]; then
81+
break
82+
fi
83+
84+
if [ $i -eq $max_attempts ] ; then
85+
logthis "Could not determine the number of attached_volumes after $i attempts. Last response was: $attached_volumes_response"
86+
break
87+
fi
88+
89+
sleep $(( 2 ** i ))
90+
done
91+
92+
echo "$attached_volumes"
6793
}
6894

6995
calc_threshold() {

0 commit comments

Comments
 (0)