Skip to content

Commit f2d8387

Browse files
authored
Merge pull request #33 from animuk/master
Ingestion Report Added
2 parents a5a734c + 6d47af7 commit f2d8387

File tree

4 files changed

+176
-6
lines changed

4 files changed

+176
-6
lines changed

f8a_report/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ def main():
2323

2424
start_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
2525
end_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
26-
daily_response = r.get_report(start_date, end_date, 'daily')
26+
daily_response, ingestion_results = r.get_report(start_date, end_date, 'daily')
2727
logger.debug('Daily report data from {s} to {e}'.format(s=start_date, e=end_date))
2828
logger.debug(json.dumps(daily_response, indent=2))
29+
logger.debug(json.dumps(ingestion_results, indent=2))
2930

3031
if time_to_generate_monthly_report(today):
3132
last_day_of_prev_month = date(today.year, today.month, 1) - timedelta(days=1)

f8a_report/report_helper.py

Lines changed: 157 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,14 +455,169 @@ def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=
455455
frequency)
456456
return result
457457

458+
def retrieve_ingestion_results(self, start_date, end_date, frequency='daily'):
459+
"""Retrieve results for selected worker from RDB."""
460+
result = {}
461+
# No of EPV ingested in a given day
462+
query = sql.SQL('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
463+
' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
464+
' AN.STARTED_AT >= \'%s\' AND AN.STARTED_AT < \'%s\''
465+
' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
466+
' AND PK.ECOSYSTEM_ID = EC.ID')
467+
468+
self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
469+
data = json.dumps(self.cursor.fetchall())
470+
471+
result['EPV_INGESTION_DATA'] = data
472+
473+
# No of EPV failed ingesting into graph
474+
475+
query = sql.SQL('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
476+
' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
477+
' AN.STARTED_AT >= \'%s\' AND AN.STARTED_AT < \'%s\''
478+
' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
479+
' AND PK.ECOSYSTEM_ID = EC.ID AND VR.SYNCED2GRAPH = \'%s\'')
480+
481+
self.cursor.execute(query.as_string(self.conn) % (start_date, end_date, 'FALSE'))
482+
data = json.dumps(self.cursor.fetchall())
483+
484+
result['EPV_GRAPH_FAILED_DATA'] = data
485+
486+
self.normalize_ingestion_data(start_date, end_date, result, frequency)
487+
return result
488+
489+
def normalize_ingestion_data(self, start_date, end_date, ingestion_data, frequency='daily'):
490+
"""Normalize worker data for reporting."""
491+
report_type = 'ingestion-data'
492+
if frequency == 'monthly':
493+
report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m')
494+
else:
495+
report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m-%d')
496+
497+
template = {
498+
'report': {
499+
'from': start_date,
500+
'to': end_date,
501+
'generated_on': dt.now().isoformat('T')
502+
},
503+
'ingestion_summary': {},
504+
'ingestion_details': []
505+
}
506+
507+
all_deps_count = {'all': 0, 'npm': 0, 'maven': 0, 'python': 0}
508+
failed_deps_count = {'all': 0, 'npm': 0, 'maven': 0, 'python': 0}
509+
all_epv_list = {'npm': [], 'maven': [], 'python': []}
510+
failed_epv_list = {'npm': [], 'maven': [], 'python': []}
511+
512+
# marshalling the total ingested epv data according to the ecosystems
513+
epv_data = ingestion_data['EPV_INGESTION_DATA']
514+
epv_data = json.loads(epv_data)
515+
for data in epv_data:
516+
all_deps_count['all'] = all_deps_count['all'] + 1
517+
if data[0] == 'maven':
518+
all_deps_count['maven'] = all_deps_count['maven'] + 1
519+
all_epv_list['maven'].append(data[1] + '::' + data[2])
520+
elif data[0] == 'npm':
521+
all_deps_count['npm'] = all_deps_count['npm'] + 1
522+
all_epv_list['npm'].append(data[1] + '::' + data[2])
523+
elif data[0] == 'python':
524+
all_deps_count['python'] = all_deps_count['python'] + 1
525+
all_epv_list['python'].append(data[1] + '::' + data[2])
526+
else:
527+
continue
528+
529+
# marshalling the total failed epv data ingested according to the ecosystems
530+
failed_epv_data = ingestion_data['EPV_GRAPH_FAILED_DATA']
531+
failed_epv_data = json.loads(failed_epv_data)
532+
for data in failed_epv_data:
533+
failed_deps_count['all'] = failed_deps_count['all'] + 1
534+
if data[0] == 'maven':
535+
failed_deps_count['maven'] = failed_deps_count['maven'] + 1
536+
failed_epv_list['maven'].append(data[1] + '::' + data[2])
537+
elif data[0] == 'npm':
538+
failed_deps_count['npm'] = failed_deps_count['npm'] + 1
539+
failed_epv_list['npm'].append(data[1] + '::' + data[2])
540+
elif data[0] == 'python':
541+
failed_deps_count['python'] = failed_deps_count['python'] + 1
542+
failed_epv_list['python'].append(data[1] + '::' + data[2])
543+
else:
544+
continue
545+
546+
# creating the epv ingestion details info according to the ecosystems
547+
for epv_data in all_epv_list:
548+
ingestion_info_template = {
549+
'ecosystem': '',
550+
'ingested_epvs': [],
551+
'failed_epvs': []
552+
}
553+
ingestion_info_template['ecosystem'] = epv_data
554+
ingestion_info_template['ingested_epvs'].append(all_epv_list[epv_data])
555+
template['ingestion_details'].append(ingestion_info_template)
556+
557+
for data in template['ingestion_details']:
558+
if data['ecosystem'] == 'maven':
559+
data['failed_epvs'] = failed_epv_list['maven']
560+
elif data['ecosystem'] == 'npm':
561+
data['failed_epvs'] = failed_epv_list['npm']
562+
elif data['ecosystem'] == 'python':
563+
data['failed_epvs'] = failed_epv_list['python']
564+
565+
# creating the epv ingestion statistics info according to the ecosystems
566+
template['ingestion_summary'] = {
567+
'total_epv_ingestion_count': all_deps_count['all'],
568+
'npm': {
569+
'epv_ingestion_count': all_deps_count['npm'],
570+
'epv_successfully_ingested_count':
571+
all_deps_count['npm'] - failed_deps_count['npm'],
572+
'failed_epv_ingestion_count': failed_deps_count['npm'],
573+
'unknown_ingestion_triggered': True
574+
},
575+
'maven': {
576+
'epv_ingestion_count': all_deps_count['maven'],
577+
'epv_successfully_ingested_count':
578+
all_deps_count['maven'] - failed_deps_count['maven'],
579+
'failed_epv_ingestion_count': failed_deps_count['maven'],
580+
'unknown_ingestion_triggered': True
581+
},
582+
'python': {
583+
'epv_ingestion_count': all_deps_count['python'],
584+
'epv_successfully_ingested_count':
585+
all_deps_count['python'] - failed_deps_count['python'],
586+
'failed_epv_ingestion_count': failed_deps_count['python'],
587+
'unknown_ingestion_triggered': True
588+
}
589+
}
590+
591+
# Saving the final report in the relevant S3 bucket
592+
try:
593+
obj_key = '{depl_prefix}/{type}/{report_name}.json'.format(
594+
depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name
595+
)
596+
self.s3.store_json_content(content=template, obj_key=obj_key,
597+
bucket_name=self.s3.report_bucket_name)
598+
except Exception as e:
599+
logger.exception('Unable to store the report on S3. Reason: %r' % e)
600+
return template
601+
458602
def get_report(self, start_date, end_date, frequency='daily'):
459603
"""Generate the stacks report."""
460604
ids = self.retrieve_stack_analyses_ids(start_date, end_date)
605+
ingestion_results = False
606+
if frequency == 'daily':
607+
result = self.retrieve_ingestion_results(start_date, end_date)
608+
epv_data = result['EPV_INGESTION_DATA']
609+
epv_data = json.loads(epv_data)
610+
if len(epv_data) > 0:
611+
ingestion_results = True
612+
else:
613+
ingestion_results = False
614+
logger.error('No ingestion data found from {s} to {e} to generate report'
615+
.format(s=start_date, e=end_date))
461616
if len(ids) > 0:
462617
worker_result = self.retrieve_worker_results(
463618
start_date, end_date, ids, ['stack_aggregator_v2'], frequency)
464-
return worker_result
619+
return worker_result, ingestion_results
465620
else:
466621
logger.error('No stack analyses found from {s} to {e} to generate an aggregated report'
467622
.format(s=start_date, e=end_date))
468-
return False
623+
return False, ingestion_results

tests/data/ingestiondata.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"EPV_INGESTION_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]", "EPV_GRAPH_FAILED_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]"}

tests/test_stack_report_helper.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
with open('tests/data/stackdata.json', 'r') as f:
1616
stackdata = f.read()
1717

18+
with open('tests/data/ingestiondata.json', 'r') as f:
19+
ingestiondata = f.read()
20+
ingestiondata = json.loads(ingestiondata)
21+
1822

1923
def test_validate_and_process_date_success():
2024
"""Test the success scenario of the function validate_and_process_date."""
@@ -229,19 +233,28 @@ def test_normalize_worker_data_no_stack_aggregator(_mock_count):
229233
@mock.patch('f8a_report.report_helper.ReportHelper.retrieve_stack_analyses_ids', return_value=['1'])
230234
def test_get_report(_mock1, _mock2):
231235
"""Test sucess Get Report."""
232-
res = r.get_report('2018-10-10', '2018-10-18')
236+
res, ing_res = r.get_report('2018-10-10', '2018-10-18')
233237
assert res is True
234238

235239

236240
@mock.patch('f8a_report.report_helper.ReportHelper.retrieve_worker_results', return_value=True)
237241
@mock.patch('f8a_report.report_helper.ReportHelper.retrieve_stack_analyses_ids', return_value=[])
238-
def test_get_report(_mock1, _mock2):
242+
@mock.patch('f8a_report.report_helper.ReportHelper.retrieve_ingestion_results',
243+
return_value=ingestiondata)
244+
def test_get_report(_mock1, _mock2, _mock3):
239245
"""Test failure Get Report."""
240-
res = r.get_report('2018-10-10', '2018-10-18')
246+
res, ing_res = r.get_report('2018-10-10', '2018-10-18')
241247
assert res is False
242248

243249

244250
def test_retrieve_worker_results():
245251
"""Test failure worker results."""
246252
res = r.retrieve_worker_results('2018-10-10', '2018-10-18', ['1', '2'], [])
247253
assert res == {}
254+
255+
256+
@mock.patch('f8a_report.report_helper.S3Helper.store_json_content', return_value=True)
257+
def test_normalize_ingestion_data(_mock1):
258+
"""Test the success scenario of the function normalize_worker_data."""
259+
resp = r.normalize_ingestion_data('2018-10-10', '2018-10-18', ingestiondata, 'daily')
260+
assert resp is not None

0 commit comments

Comments
 (0)