@@ -206,23 +206,36 @@ def startCluster():
206206 requestInfo = ec2client .request_spot_fleet (SpotFleetRequestConfig = spotfleetConfig )
207207 print 'Request in process. Wait until your machines are available in the cluster.'
208208 print 'SpotFleetRequestId' ,requestInfo ['SpotFleetRequestId' ]
209+ starttime = str (int (time .time ()* 1000 ))
209210 createMonitor = open ('files/' + APP_NAME + 'SpotFleetRequestId.json' ,'w' )
210211 createMonitor .write ('{"MONITOR_FLEET_ID" : "' + requestInfo ['SpotFleetRequestId' ]+ '",\n ' )
211212 createMonitor .write ('"MONITOR_APP_NAME" : "' + APP_NAME + '",\n ' )
212213 createMonitor .write ('"MONITOR_ECS_CLUSTER" : "' + ECS_CLUSTER + '",\n ' )
213214 createMonitor .write ('"MONITOR_QUEUE_NAME" : "' + SQS_QUEUE_NAME + '",\n ' )
214215 createMonitor .write ('"MONITOR_BUCKET_NAME" : "' + AWS_BUCKET + '",\n ' )
215216 createMonitor .write ('"MONITOR_LOG_GROUP_NAME" : "' + LOG_GROUP_NAME + '",\n ' )
216- createMonitor .write ('"MONITOR_START_TIME" : "' + str ( int ( time . time () * 1000 )) + '"}\n ' )
217+ createMonitor .write ('"MONITOR_START_TIME" : "' + starttime + '"}\n ' )
217218 createMonitor .close ()
218219
219220
220221
221222
222223 # Step 2: wait until instances in the cluster are available
223224 cmd = 'aws ec2 describe-spot-fleet-instances --spot-fleet-request-id ' + requestInfo ['SpotFleetRequestId' ]
225+ cmd_tbl = 'aws ec2 describe-spot-fleet-request-history --spot-fleet-request-id ' + requestInfo ['SpotFleetRequestId' ] + \
226+ ' --event-type error --start-time ' + datetime .date .isoformat (datetime .date .today ())
224227 status = getAWSJsonOutput (cmd )
225228 while len (status ['ActiveInstances' ]) < CLUSTER_MACHINES :
229+ # First check to make sure there's not a problem
230+ errorcheck = getAWSJsonOutput (cmd_tbl )
231+ if len (errorcheck ['HistoryRecords' ]) != 0 :
232+ print 'Your spot fleet request is causing an error and is now being cancelled. Please check your configuration and try again'
233+ for eacherror in errorcheck ['HistoryRecords' ]:
234+ print eacherror ['EventInformation' ]['EventSubType' ] + ' : ' + eacherror ['EventInformation' ]['EventDescription' ]
235+ cmd = 'aws ec2 cancel-spot-fleet-requests --spot-fleet-request-ids ' + requestInfo ['SpotFleetRequestId' ] + ' --terminate-instances'
236+ result = getAWSJsonOutput (cmd )
237+ return
238+ # If everything seems good, just bide your time until you're ready to go
226239 time .sleep (20 )
227240 print '.' ,
228241 status = getAWSJsonOutput (cmd )
0 commit comments