Skip to content

Commit 3e4f633

Browse files
authored
Report errors and return if spot fleet config is bad (#69)
1 parent e1eaf95 commit 3e4f633

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

run.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,23 +206,36 @@ def startCluster():
206206
requestInfo = ec2client.request_spot_fleet(SpotFleetRequestConfig=spotfleetConfig)
207207
print 'Request in process. Wait until your machines are available in the cluster.'
208208
print 'SpotFleetRequestId',requestInfo['SpotFleetRequestId']
209+
starttime=str(int(time.time()*1000))
209210
createMonitor=open('files/' + APP_NAME + 'SpotFleetRequestId.json','w')
210211
createMonitor.write('{"MONITOR_FLEET_ID" : "'+requestInfo['SpotFleetRequestId']+'",\n')
211212
createMonitor.write('"MONITOR_APP_NAME" : "'+APP_NAME+'",\n')
212213
createMonitor.write('"MONITOR_ECS_CLUSTER" : "'+ECS_CLUSTER+'",\n')
213214
createMonitor.write('"MONITOR_QUEUE_NAME" : "'+SQS_QUEUE_NAME+'",\n')
214215
createMonitor.write('"MONITOR_BUCKET_NAME" : "'+AWS_BUCKET+'",\n')
215216
createMonitor.write('"MONITOR_LOG_GROUP_NAME" : "'+LOG_GROUP_NAME+'",\n')
216-
createMonitor.write('"MONITOR_START_TIME" : "'+str(int(time.time()*1000))+'"}\n')
217+
createMonitor.write('"MONITOR_START_TIME" : "'+ starttime+'"}\n')
217218
createMonitor.close()
218219

219220

220221

221222

222223
# Step 2: wait until instances in the cluster are available
223224
cmd = 'aws ec2 describe-spot-fleet-instances --spot-fleet-request-id ' + requestInfo['SpotFleetRequestId']
225+
cmd_tbl='aws ec2 describe-spot-fleet-request-history --spot-fleet-request-id ' + requestInfo['SpotFleetRequestId'] + \
226+
' --event-type error --start-time '+ datetime.date.isoformat(datetime.date.today())
224227
status = getAWSJsonOutput(cmd)
225228
while len(status['ActiveInstances']) < CLUSTER_MACHINES:
229+
# First check to make sure there's not a problem
230+
errorcheck = getAWSJsonOutput(cmd_tbl)
231+
if len(errorcheck['HistoryRecords']) != 0:
232+
print 'Your spot fleet request is causing an error and is now being cancelled. Please check your configuration and try again'
233+
for eacherror in errorcheck['HistoryRecords']:
234+
print eacherror['EventInformation']['EventSubType'] + ' : ' + eacherror['EventInformation']['EventDescription']
235+
cmd = 'aws ec2 cancel-spot-fleet-requests --spot-fleet-request-ids ' + requestInfo['SpotFleetRequestId'] + ' --terminate-instances'
236+
result = getAWSJsonOutput(cmd)
237+
return
238+
# If everything seems good, just bide your time until you're ready to go
226239
time.sleep(20)
227240
print '.',
228241
status = getAWSJsonOutput(cmd)

0 commit comments

Comments
 (0)