1
1
import json
2
+ import commands
2
3
import multiprocessing
3
4
import time
4
5
import logging
@@ -368,43 +369,85 @@ def check_in_yarn(job_name):
368
369
logging .debug (str (error_message ))
369
370
return run_app_info
370
371
371
- def spark_application (job_name ):
372
+ # pylint: disable=C0103
373
+
374
+ def check_in_service_log (namespace , application , component_name ):
375
+ '''
376
+ Check in service log in case of application failed to submit to YARN
377
+ '''
378
+ service_name = '%s-%s-%s' % (namespace , application , component_name )
379
+ (command , message , more_detail ) = ('' , '' , '' )
380
+ command = 'sudo journalctl -u %s.service' % service_name
381
+ out = commands .getoutput ('%s -n 50' % command ).split ('\n ' )
382
+ more_detail = 'More details: execute "journalctl -u %s"' % service_name
383
+ for line in out :
384
+ if 'Exception:' in line :
385
+ message = '%s%s' % (line .split ('Exception:' )[0 ].split (' ' )[- 1 ], 'Exception' )
386
+ break
387
+ if message == '' :
388
+ message = '%s' % (more_detail )
389
+ else :
390
+ message = '%s. %s' % (message , more_detail )
391
+ return 'FAILED_TO_SUBMIT_TO_YARN' , message
392
+
393
+ def spark_yarn_handler (yarn_data ):
394
+ '''
395
+ Handling Spark YARN data
396
+ '''
397
+ information = ''
398
+ aggregate_status = ''
399
+ yarnid = yarn_data ['id' ]
400
+ if yarn_data ['state' ] == 'SUBMITTED' or yarn_data ['state' ] == 'ACCEPTED' :
401
+ aggregate_status = yarn_data ['state' ]
402
+ message = yarn_data ['diagnostics' ].split ('Details :' )[0 ].strip ()
403
+ information = message
404
+ elif yarn_data ['state' ] == 'RUNNING' :
405
+ spark_data = spark_job_handler (yarn_data ['id' ])
406
+ if spark_data ['state' ] == 'OK' :
407
+ aggregate_status = 'RUNNING'
408
+ else :
409
+ aggregate_status = 'RUNNING_WITH_ERRORS'
410
+ information = spark_data ['information' ]
411
+ elif yarn_data ['finalStatus' ] == 'SUCCEEDED' :
412
+ aggregate_status = '%s_%s' % (yarn_data ['state' ], yarn_data ['finalStatus' ])
413
+ elif yarn_data ['state' ] == 'FINISHED' and (yarn_data ['finalStatus' ] == 'FAILED' or yarn_data ['finalStatus' ] == 'KILLED' ):
414
+ aggregate_status = '%s_%s' % (yarn_data ['state' ], yarn_data ['finalStatus' ])
415
+ information = yarn_data ['diagnostics' ]
416
+ elif yarn_data ['finalStatus' ] == 'FAILED' or yarn_data ['finalStatus' ] == 'KILLED' :
417
+ aggregate_status = yarn_data ['finalStatus' ]
418
+ information = yarn_data ['diagnostics' ]
419
+ else :
420
+ aggregate_status = 'NOT_FOUND'
421
+ message = yarn_data .get ('RemoteException' , {'message' : ['' ]}).\
422
+ get ('message' ).split (':' )
423
+ message [0 ] = ''
424
+ information = '' .join (message ).strip ()
425
+ return aggregate_status , yarnid , information
426
+
427
+ def spark_application (job_name , application , component_name ):
372
428
"""
373
429
Handling SPARK Application
374
430
"""
375
431
ret = {}
376
- yarnid = ''
377
- information = ''
432
+ check_in_service = False
433
+ (aggregate_status , yarnid , information ) = ('' , '' , '' )
434
+ status , timestamp = _HBASE .get_status_with_timestamp (application )
378
435
yarn_data = check_in_yarn (job_name )
379
- if yarn_data != None :
380
- yarnid = yarn_data ['id' ]
381
- if yarn_data ['state' ] == 'SUBMITTED' or yarn_data ['state' ] == 'ACCEPTED' :
382
- aggregate_status = yarn_data ['state' ]
383
- message = yarn_data ['diagnostics' ].split ('Details :' )[0 ].strip ()
384
- information = message
385
- elif yarn_data ['state' ] == 'RUNNING' :
386
- spark_data = spark_job_handler (yarn_data ['id' ])
387
- if spark_data ['state' ] == 'OK' :
388
- aggregate_status = 'RUNNING'
389
- else :
390
- aggregate_status = 'RUNNING_WITH_ERRORS'
391
- information = spark_data ['information' ]
392
- elif yarn_data ['finalStatus' ] == 'SUCCEEDED' :
393
- aggregate_status = '%s_%s' % (yarn_data ['state' ], yarn_data ['finalStatus' ])
394
- elif yarn_data ['state' ] == 'FINISHED' and (yarn_data ['finalStatus' ] == 'FAILED' or yarn_data ['finalStatus' ] == 'KILLED' ):
395
- aggregate_status = '%s_%s' % (yarn_data ['state' ], yarn_data ['finalStatus' ])
396
- information = yarn_data ['diagnostics' ]
397
- elif yarn_data ['finalStatus' ] == 'FAILED' or yarn_data ['finalStatus' ] == 'KILLED' :
398
- aggregate_status = yarn_data ['finalStatus' ]
399
- information = yarn_data ['diagnostics' ]
436
+ if status == 'CREATED' :
437
+ if yarn_data != None :
438
+ aggregate_status , yarnid , information = spark_yarn_handler (yarn_data )
400
439
else :
401
- aggregate_status = 'NOT_FOUND'
402
- message = yarn_data .get ('RemoteException' , {'message' : ['' ]}).\
403
- get ('message' ).split (':' )
404
- message [0 ] = ''
405
- information = '' .join (message ).strip ()
440
+ aggregate_status = ApplicationState .CREATED
406
441
else :
407
- aggregate_status = ApplicationState .CREATED
442
+ if yarn_data != None :
443
+ if timestamp < yarn_data ['startedTime' ]:
444
+ aggregate_status , yarnid , information = spark_yarn_handler (yarn_data )
445
+ else :
446
+ check_in_service = True
447
+ else :
448
+ check_in_service = True
449
+ if check_in_service :
450
+ aggregate_status , information = check_in_service_log (CONFIG ['environment' ]['namespace' ], application , component_name )
408
451
ret = {
409
452
'aggregate_status' : aggregate_status ,
410
453
'yarnId' : yarnid ,
@@ -431,7 +474,8 @@ def get_json(component_list, queue_obj):
431
474
(component [application ][component_name ]['job_handle' ])})
432
475
if 'sparkStreaming' in component_name :
433
476
ret [application ].update ({component_name : spark_application \
434
- (component [application ][component_name ]['component_job_name' ])})
477
+ (component [application ][component_name ]['component_job_name' ], application ,\
478
+ component [application ][component_name ]['component_name' ])})
435
479
queue_obj .put ([{process_name : ret }])
436
480
logging .info ('%s %s' , 'Finished' , process_name )
437
481
0 commit comments