@@ -455,14 +455,169 @@ def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=
455455 frequency )
456456 return result
457457
458+ def retrieve_ingestion_results (self , start_date , end_date , frequency = 'daily' ):
459+ """Retrieve results for selected worker from RDB."""
460+ result = {}
461+ # No of EPV ingested in a given day
462+ query = sql .SQL ('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
463+ ' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
464+ ' AN.STARTED_AT >= \' %s\' AND AN.STARTED_AT < \' %s\' '
465+ ' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
466+ ' AND PK.ECOSYSTEM_ID = EC.ID' )
467+
468+ self .cursor .execute (query .as_string (self .conn ) % (start_date , end_date ))
469+ data = json .dumps (self .cursor .fetchall ())
470+
471+ result ['EPV_INGESTION_DATA' ] = data
472+
473+ # No of EPV failed ingesting into graph
474+
475+ query = sql .SQL ('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
476+ ' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
477+ ' AN.STARTED_AT >= \' %s\' AND AN.STARTED_AT < \' %s\' '
478+ ' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
479+ ' AND PK.ECOSYSTEM_ID = EC.ID AND VR.SYNCED2GRAPH = \' %s\' ' )
480+
481+ self .cursor .execute (query .as_string (self .conn ) % (start_date , end_date , 'FALSE' ))
482+ data = json .dumps (self .cursor .fetchall ())
483+
484+ result ['EPV_GRAPH_FAILED_DATA' ] = data
485+
486+ self .normalize_ingestion_data (start_date , end_date , result , frequency )
487+ return result
488+
489+ def normalize_ingestion_data (self , start_date , end_date , ingestion_data , frequency = 'daily' ):
490+ """Normalize worker data for reporting."""
491+ report_type = 'ingestion-data'
492+ if frequency == 'monthly' :
493+ report_name = dt .strptime (end_date , '%Y-%m-%d' ).strftime ('%Y-%m' )
494+ else :
495+ report_name = dt .strptime (end_date , '%Y-%m-%d' ).strftime ('%Y-%m-%d' )
496+
497+ template = {
498+ 'report' : {
499+ 'from' : start_date ,
500+ 'to' : end_date ,
501+ 'generated_on' : dt .now ().isoformat ('T' )
502+ },
503+ 'ingestion_summary' : {},
504+ 'ingestion_details' : []
505+ }
506+
507+ all_deps_count = {'all' : 0 , 'npm' : 0 , 'maven' : 0 , 'python' : 0 }
508+ failed_deps_count = {'all' : 0 , 'npm' : 0 , 'maven' : 0 , 'python' : 0 }
509+ all_epv_list = {'npm' : [], 'maven' : [], 'python' : []}
510+ failed_epv_list = {'npm' : [], 'maven' : [], 'python' : []}
511+
512+ # marshalling the total ingested epv data according to the ecosystems
513+ epv_data = ingestion_data ['EPV_INGESTION_DATA' ]
514+ epv_data = json .loads (epv_data )
515+ for data in epv_data :
516+ all_deps_count ['all' ] = all_deps_count ['all' ] + 1
517+ if data [0 ] == 'maven' :
518+ all_deps_count ['maven' ] = all_deps_count ['maven' ] + 1
519+ all_epv_list ['maven' ].append (data [1 ] + '::' + data [2 ])
520+ elif data [0 ] == 'npm' :
521+ all_deps_count ['npm' ] = all_deps_count ['npm' ] + 1
522+ all_epv_list ['npm' ].append (data [1 ] + '::' + data [2 ])
523+ elif data [0 ] == 'python' :
524+ all_deps_count ['python' ] = all_deps_count ['python' ] + 1
525+ all_epv_list ['python' ].append (data [1 ] + '::' + data [2 ])
526+ else :
527+ continue
528+
529+ # marshalling the total failed epv data ingested according to the ecosystems
530+ failed_epv_data = ingestion_data ['EPV_GRAPH_FAILED_DATA' ]
531+ failed_epv_data = json .loads (failed_epv_data )
532+ for data in failed_epv_data :
533+ failed_deps_count ['all' ] = failed_deps_count ['all' ] + 1
534+ if data [0 ] == 'maven' :
535+ failed_deps_count ['maven' ] = failed_deps_count ['maven' ] + 1
536+ failed_epv_list ['maven' ].append (data [1 ] + '::' + data [2 ])
537+ elif data [0 ] == 'npm' :
538+ failed_deps_count ['npm' ] = failed_deps_count ['npm' ] + 1
539+ failed_epv_list ['npm' ].append (data [1 ] + '::' + data [2 ])
540+ elif data [0 ] == 'python' :
541+ failed_deps_count ['python' ] = failed_deps_count ['python' ] + 1
542+ failed_epv_list ['python' ].append (data [1 ] + '::' + data [2 ])
543+ else :
544+ continue
545+
546+ # creating the epv ingestion details info according to the ecosystems
547+ for epv_data in all_epv_list :
548+ ingestion_info_template = {
549+ 'ecosystem' : '' ,
550+ 'ingested_epvs' : [],
551+ 'failed_epvs' : []
552+ }
553+ ingestion_info_template ['ecosystem' ] = epv_data
554+ ingestion_info_template ['ingested_epvs' ].append (all_epv_list [epv_data ])
555+ template ['ingestion_details' ].append (ingestion_info_template )
556+
557+ for data in template ['ingestion_details' ]:
558+ if data ['ecosystem' ] == 'maven' :
559+ data ['failed_epvs' ] = failed_epv_list ['maven' ]
560+ elif data ['ecosystem' ] == 'npm' :
561+ data ['failed_epvs' ] = failed_epv_list ['npm' ]
562+ elif data ['ecosystem' ] == 'python' :
563+ data ['failed_epvs' ] = failed_epv_list ['python' ]
564+
565+ # creating the epv ingestion statistics info according to the ecosystems
566+ template ['ingestion_summary' ] = {
567+ 'total_epv_ingestion_count' : all_deps_count ['all' ],
568+ 'npm' : {
569+ 'epv_ingestion_count' : all_deps_count ['npm' ],
570+ 'epv_successfully_ingested_count' :
571+ all_deps_count ['npm' ] - failed_deps_count ['npm' ],
572+ 'failed_epv_ingestion_count' : failed_deps_count ['npm' ],
573+ 'unknown_ingestion_triggered' : True
574+ },
575+ 'maven' : {
576+ 'epv_ingestion_count' : all_deps_count ['maven' ],
577+ 'epv_successfully_ingested_count' :
578+ all_deps_count ['maven' ] - failed_deps_count ['maven' ],
579+ 'failed_epv_ingestion_count' : failed_deps_count ['maven' ],
580+ 'unknown_ingestion_triggered' : True
581+ },
582+ 'python' : {
583+ 'epv_ingestion_count' : all_deps_count ['python' ],
584+ 'epv_successfully_ingested_count' :
585+ all_deps_count ['python' ] - failed_deps_count ['python' ],
586+ 'failed_epv_ingestion_count' : failed_deps_count ['python' ],
587+ 'unknown_ingestion_triggered' : True
588+ }
589+ }
590+
591+ # Saving the final report in the relevant S3 bucket
592+ try :
593+ obj_key = '{depl_prefix}/{type}/{report_name}.json' .format (
594+ depl_prefix = self .s3 .deployment_prefix , type = report_type , report_name = report_name
595+ )
596+ self .s3 .store_json_content (content = template , obj_key = obj_key ,
597+ bucket_name = self .s3 .report_bucket_name )
598+ except Exception as e :
599+ logger .exception ('Unable to store the report on S3. Reason: %r' % e )
600+ return template
601+
458602 def get_report (self , start_date , end_date , frequency = 'daily' ):
459603 """Generate the stacks report."""
460604 ids = self .retrieve_stack_analyses_ids (start_date , end_date )
605+ ingestion_results = False
606+ if frequency == 'daily' :
607+ result = self .retrieve_ingestion_results (start_date , end_date )
608+ epv_data = result ['EPV_INGESTION_DATA' ]
609+ epv_data = json .loads (epv_data )
610+ if len (epv_data ) > 0 :
611+ ingestion_results = True
612+ else :
613+ ingestion_results = False
614+ logger .error ('No ingestion data found from {s} to {e} to generate report'
615+ .format (s = start_date , e = end_date ))
461616 if len (ids ) > 0 :
462617 worker_result = self .retrieve_worker_results (
463618 start_date , end_date , ids , ['stack_aggregator_v2' ], frequency )
464- return worker_result
619+ return worker_result , ingestion_results
465620 else :
466621 logger .error ('No stack analyses found from {s} to {e} to generate an aggregated report'
467622 .format (s = start_date , e = end_date ))
468- return False
623+ return False , ingestion_results
0 commit comments