@@ -161,25 +161,60 @@ function create_and_attach_volume() {
161161 local availability_zone=$( get_metadata placement/availability-zone)
162162 local region=${availability_zone% ?}
163163
164- local attached_volumes=$(
165- aws ec2 describe-volumes \
164+ local max_attempts=10
165+ local attached_volumes=" "
166+
167+ for i in $( eval echo " {0..$max_attempts }" ) ; do
168+ attached_volumes=$(
169+ aws ec2 describe-volumes \
166170 --region $region \
167171 --filters " Name=attachment.instance-id,Values=$instance_id "
168- )
172+ )
169173
170- local created_volumes=$(
171- aws ec2 describe-volumes \
172- --region $region \
173- --filters " Name=tag:source-instance,Values=$instance_id "
174- )
174+ if [ $? -eq 0 ]; then
175+ break
176+ elif [ $i -eq $max_attempts ]; then
177+ logthis " Could not determine the number of attached_volumes after $i attempts. Last response was: $attached_volumes "
178+ break
179+ fi
180+ sleep $(( 2 ** i ))
181+ done
175182
176- local total_created_size=$(
177- aws ec2 describe-volumes \
178- --region $region \
179- --filters " Name=tag:source-instance,Values=$instance_id " \
180- --query ' sum(Volumes[].Size)' \
181- --output text
182- )
183+ local created_volumes=" "
184+ for i in $( eval echo " {0..$max_attempts }" ) ; do
185+ created_volumes=$(
186+ aws ec2 describe-volumes \
187+ --region $region \
188+ --filters " Name=tag:source-instance,Values=$instance_id "
189+ )
190+
191+ if [ $? -eq 0 ]; then
192+ break
193+ elif [ $i -eq $max_attempts ]; then
194+ logthis " Could not determine the number of created_volumes after $i attempts. Last response was: $created_volumes "
195+ break
196+ fi
197+ sleep $(( 2 ** i ))
198+ done
199+
200+ local total_created_size=" "
201+ for i in $( eval echo " {0..$max_attempts }" ) ; do
202+ total_created_size=$(
203+ aws ec2 describe-volumes \
204+ --region $region \
205+ --filters " Name=tag:source-instance,Values=$instance_id " \
206+ --query ' sum(Volumes[].Size)' \
207+ --output text
208+ )
209+
210+ if [ $? -eq 0 ]; then
211+ break
212+ elif [ $i -eq $max_attempts ]; then
213+ logthis " Could not determine the total_created_size after $i attempts. Last response was: $total_created_size "
214+ break
215+ fi
216+ sleep $(( 2 ** i ))
217+ done
183218
184219 # check how much EBS storage this instance has created
185220 if [ " $total_created_size " -ge " $MAX_TOTAL_EBS_SIZE " ]; then
@@ -209,14 +244,27 @@ function create_and_attach_volume() {
209244 if [ " $TYPE " == " io1" ]; then volume_opts=" $volume_opts --iops $IOPS " ; fi
210245 if [ " $ENCRYPTED " == " 1" ]; then volume_opts=" $volume_opts --encrypted" ; fi
211246 local timestamp=$( date " +%F %T UTC%z" ) # YYYY-mm-dd HH:MM:SS UTC+0000
212- local volume=$( \
213- aws ec2 create-volume \
214- --region $region \
215- --availability-zone $availability_zone \
216- $volume_opts \
217- --tag-specification " ResourceType=volume,Tags=[{Key=source-instance,Value=$instance_id },{Key=amazon-ebs-autoscale-creation-time,Value=$timestamp }]" \
218- 2> $tmpfile
219- )
247+
248+ local volume=" "
249+ for i in $( eval echo " {0..$max_attempts }" ) ; do
250+ local volume=$( \
251+ aws ec2 create-volume \
252+ --region $region \
253+ --availability-zone $availability_zone \
254+ $volume_opts \
255+ --tag-specification " ResourceType=volume,Tags=[{Key=source-instance,Value=$instance_id },{Key=amazon-ebs-autoscale-creation-time,Value=$timestamp }]" \
256+ 2> $tmpfile
257+ )
258+
259+ if [ $? -eq 0 ]; then
260+ break
261+ elif [ $i -eq $max_attempts ]; then
262+ logthis " Could not create a volume after $i attempts. Last response was: $volume "
263+ break
264+ fi
265+ sleep $(( 2 ** i ))
266+ done
267+
220268 local volume_id=` echo $volume | jq -r ' .VolumeId' `
221269
222270 if [ -z " $volume_id " ]; then
@@ -230,13 +278,20 @@ function create_and_attach_volume() {
230278
231279 logthis " created volume: $volume_id [ $volume_opts ]"
232280
233- aws ec2 wait volume-available --region $region --volume-ids $volume_id
234- logthis " volume $volume_id available"
281+ # In theory this shouldn't need to loop as aws ec2 wait will retry but I have seen it exceed request limits
282+ for i in {1..3} ; do
283+ if aws ec2 wait volume-available --region $region --volume-ids $volume_id ; then
284+ logthis " volume $volume_id available"
285+ break
286+ fi
287+ done
235288
236289 # Need to assure that the created volume is successfully attached to be
237290 # cost efficient. If attachment fails, delete the volume.
238291 set +e
239292 logthis " attaching volume $volume_id "
293+
294+ sleep 1
240295 aws ec2 attach-volume \
241296 --region $region \
242297 --device $device \
0 commit comments