Merge pull request #33 from animuk/master

animuk · web-flow · commit f2d83877125a · 2019-03-08T15:30:44.000+05:30
Ingestion Report Added
diff --git a/f8a_report/main.py b/f8a_report/main.py
@@ -23,9 +23,10 @@ def main():
 
     start_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
     end_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
-    daily_response = r.get_report(start_date, end_date, 'daily')
+    daily_response, ingestion_results = r.get_report(start_date, end_date, 'daily')
     logger.debug('Daily report data from {s} to {e}'.format(s=start_date, e=end_date))
     logger.debug(json.dumps(daily_response, indent=2))
+    logger.debug(json.dumps(ingestion_results, indent=2))
 
     if time_to_generate_monthly_report(today):
         last_day_of_prev_month = date(today.year, today.month, 1) - timedelta(days=1)
diff --git a/f8a_report/report_helper.py b/f8a_report/report_helper.py
@@ -455,14 +455,169 @@ def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=
                                                         frequency)
         return result
 
+    def retrieve_ingestion_results(self, start_date, end_date, frequency='daily'):
+        """Retrieve results for selected worker from RDB."""
+        result = {}
+        # No of EPV ingested in a given day
+        query = sql.SQL('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
+                        ' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
+                        ' AN.STARTED_AT >= \'%s\' AND AN.STARTED_AT < \'%s\''
+                        ' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
+                        ' AND PK.ECOSYSTEM_ID = EC.ID')
+
+        self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
+        data = json.dumps(self.cursor.fetchall())
+
+        result['EPV_INGESTION_DATA'] = data
+
+        # No of EPV failed ingesting into graph
+
+        query = sql.SQL('SELECT EC.NAME, PK.NAME, VR.IDENTIFIER FROM ANALYSES AN,'
+                        ' PACKAGES PK, VERSIONS VR, ECOSYSTEMS EC WHERE'
+                        ' AN.STARTED_AT >= \'%s\' AND AN.STARTED_AT < \'%s\''
+                        ' AND AN.VERSION_ID = VR.ID AND VR.PACKAGE_ID = PK.ID'
+                        ' AND PK.ECOSYSTEM_ID = EC.ID AND VR.SYNCED2GRAPH = \'%s\'')
+
+        self.cursor.execute(query.as_string(self.conn) % (start_date, end_date, 'FALSE'))
+        data = json.dumps(self.cursor.fetchall())
+
+        result['EPV_GRAPH_FAILED_DATA'] = data
+
+        self.normalize_ingestion_data(start_date, end_date, result, frequency)
+        return result
+
+    def normalize_ingestion_data(self, start_date, end_date, ingestion_data, frequency='daily'):
+        """Normalize worker data for reporting."""
+        report_type = 'ingestion-data'
+        if frequency == 'monthly':
+            report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m')
+        else:
+            report_name = dt.strptime(end_date, '%Y-%m-%d').strftime('%Y-%m-%d')
+
+        template = {
+            'report': {
+                'from': start_date,
+                'to': end_date,
+                'generated_on': dt.now().isoformat('T')
+            },
+            'ingestion_summary': {},
+            'ingestion_details': []
+        }
+
+        all_deps_count = {'all': 0, 'npm': 0, 'maven': 0, 'python': 0}
+        failed_deps_count = {'all': 0, 'npm': 0, 'maven': 0, 'python': 0}
+        all_epv_list = {'npm': [], 'maven': [], 'python': []}
+        failed_epv_list = {'npm': [], 'maven': [], 'python': []}
+
+        # marshalling the total ingested epv data according to the ecosystems
+        epv_data = ingestion_data['EPV_INGESTION_DATA']
+        epv_data = json.loads(epv_data)
+        for data in epv_data:
+            all_deps_count['all'] = all_deps_count['all'] + 1
+            if data[0] == 'maven':
+                all_deps_count['maven'] = all_deps_count['maven'] + 1
+                all_epv_list['maven'].append(data[1] + '::' + data[2])
+            elif data[0] == 'npm':
+                all_deps_count['npm'] = all_deps_count['npm'] + 1
+                all_epv_list['npm'].append(data[1] + '::' + data[2])
+            elif data[0] == 'python':
+                all_deps_count['python'] = all_deps_count['python'] + 1
+                all_epv_list['python'].append(data[1] + '::' + data[2])
+            else:
+                continue
+
+        # marshalling the total failed epv data ingested according to the ecosystems
+        failed_epv_data = ingestion_data['EPV_GRAPH_FAILED_DATA']
+        failed_epv_data = json.loads(failed_epv_data)
+        for data in failed_epv_data:
+            failed_deps_count['all'] = failed_deps_count['all'] + 1
+            if data[0] == 'maven':
+                failed_deps_count['maven'] = failed_deps_count['maven'] + 1
+                failed_epv_list['maven'].append(data[1] + '::' + data[2])
+            elif data[0] == 'npm':
+                failed_deps_count['npm'] = failed_deps_count['npm'] + 1
+                failed_epv_list['npm'].append(data[1] + '::' + data[2])
+            elif data[0] == 'python':
+                failed_deps_count['python'] = failed_deps_count['python'] + 1
+                failed_epv_list['python'].append(data[1] + '::' + data[2])
+            else:
+                continue
+
+        # creating the epv ingestion details info according to the ecosystems
+        for epv_data in all_epv_list:
+            ingestion_info_template = {
+                'ecosystem': '',
+                'ingested_epvs': [],
+                'failed_epvs': []
+            }
+            ingestion_info_template['ecosystem'] = epv_data
+            ingestion_info_template['ingested_epvs'].append(all_epv_list[epv_data])
+            template['ingestion_details'].append(ingestion_info_template)
+
+        for data in template['ingestion_details']:
+            if data['ecosystem'] == 'maven':
+                data['failed_epvs'] = failed_epv_list['maven']
+            elif data['ecosystem'] == 'npm':
+                data['failed_epvs'] = failed_epv_list['npm']
+            elif data['ecosystem'] == 'python':
+                data['failed_epvs'] = failed_epv_list['python']
+
+        # creating the epv ingestion statistics info according to the ecosystems
+        template['ingestion_summary'] = {
+            'total_epv_ingestion_count': all_deps_count['all'],
+            'npm': {
+                'epv_ingestion_count': all_deps_count['npm'],
+                'epv_successfully_ingested_count':
+                    all_deps_count['npm'] - failed_deps_count['npm'],
+                'failed_epv_ingestion_count': failed_deps_count['npm'],
+                'unknown_ingestion_triggered': True
+            },
+            'maven': {
+                'epv_ingestion_count': all_deps_count['maven'],
+                'epv_successfully_ingested_count':
+                    all_deps_count['maven'] - failed_deps_count['maven'],
+                'failed_epv_ingestion_count': failed_deps_count['maven'],
+                'unknown_ingestion_triggered': True
+            },
+            'python': {
+                'epv_ingestion_count': all_deps_count['python'],
+                'epv_successfully_ingested_count':
+                    all_deps_count['python'] - failed_deps_count['python'],
+                'failed_epv_ingestion_count': failed_deps_count['python'],
+                'unknown_ingestion_triggered': True
+            }
+        }
+
+        # Saving the final report in the relevant S3 bucket
+        try:
+            obj_key = '{depl_prefix}/{type}/{report_name}.json'.format(
+                depl_prefix=self.s3.deployment_prefix, type=report_type, report_name=report_name
+            )
+            self.s3.store_json_content(content=template, obj_key=obj_key,
+                                       bucket_name=self.s3.report_bucket_name)
+        except Exception as e:
+            logger.exception('Unable to store the report on S3. Reason: %r' % e)
+        return template
+
     def get_report(self, start_date, end_date, frequency='daily'):
         """Generate the stacks report."""
         ids = self.retrieve_stack_analyses_ids(start_date, end_date)
+        ingestion_results = False
+        if frequency == 'daily':
+            result = self.retrieve_ingestion_results(start_date, end_date)
+            epv_data = result['EPV_INGESTION_DATA']
+            epv_data = json.loads(epv_data)
+            if len(epv_data) > 0:
+                ingestion_results = True
+            else:
+                ingestion_results = False
+                logger.error('No ingestion data found from {s} to {e} to generate report'
+                             .format(s=start_date, e=end_date))
         if len(ids) > 0:
             worker_result = self.retrieve_worker_results(
                 start_date, end_date, ids, ['stack_aggregator_v2'], frequency)
-            return worker_result
+            return worker_result, ingestion_results
         else:
             logger.error('No stack analyses found from {s} to {e} to generate an aggregated report'
                          .format(s=start_date, e=end_date))
-            return False
+            return False, ingestion_results
diff --git a/tests/data/ingestiondata.json b/tests/data/ingestiondata.json
@@ -0,0 +1 @@
+{"EPV_INGESTION_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]", "EPV_GRAPH_FAILED_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]"}
diff --git a/tests/test_stack_report_helper.py b/tests/test_stack_report_helper.py
@@ -15,6 +15,10 @@
 with open('tests/data/stackdata.json', 'r') as f:
     stackdata = f.read()
 
+with open('tests/data/ingestiondata.json', 'r') as f:
+    ingestiondata = f.read()
+    ingestiondata = json.loads(ingestiondata)
+
 
 def test_validate_and_process_date_success():
     """Test the success scenario of the function validate_and_process_date."""
@@ -229,19 +233,28 @@ def test_normalize_worker_data_no_stack_aggregator(_mock_count):
 @mock.patch('f8a_report.report_helper.ReportHelper.retrieve_stack_analyses_ids', return_value=['1'])
 def test_get_report(_mock1, _mock2):
     """Test sucess Get Report."""
-    res = r.get_report('2018-10-10', '2018-10-18')
+    res, ing_res = r.get_report('2018-10-10', '2018-10-18')
     assert res is True
 
 
 @mock.patch('f8a_report.report_helper.ReportHelper.retrieve_worker_results', return_value=True)
 @mock.patch('f8a_report.report_helper.ReportHelper.retrieve_stack_analyses_ids', return_value=[])
-def test_get_report(_mock1, _mock2):
+@mock.patch('f8a_report.report_helper.ReportHelper.retrieve_ingestion_results',
+            return_value=ingestiondata)
+def test_get_report(_mock1, _mock2, _mock3):
     """Test failure Get Report."""
-    res = r.get_report('2018-10-10', '2018-10-18')
+    res, ing_res = r.get_report('2018-10-10', '2018-10-18')
     assert res is False
 
 
 def test_retrieve_worker_results():
     """Test failure worker results."""
     res = r.retrieve_worker_results('2018-10-10', '2018-10-18', ['1', '2'], [])
     assert res == {}
+
+
+@mock.patch('f8a_report.report_helper.S3Helper.store_json_content', return_value=True)
+def test_normalize_ingestion_data(_mock1):
+    """Test the success scenario of the function normalize_worker_data."""
+    resp = r.normalize_ingestion_data('2018-10-10', '2018-10-18', ingestiondata, 'daily')
+    assert resp is not None

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"EPV_INGESTION_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]", "EPV_GRAPH_FAILED_DATA": "[[\"maven\", \"software.amazon.awscdk:dms\", \"0.21.0\"]]"}`