Merge pull request #31 from samuzzal-choudhury/daily_job

miteshvp · web-flow · commit a5a734c426aa · 2019-03-08T10:48:53.000+05:30
Job scheduled daily instead of monthly
diff --git a/f8a_report/main.py b/f8a_report/main.py
@@ -11,7 +11,8 @@
 
 def time_to_generate_monthly_report(today):
     """Check whether it is the right time to generate monthly report."""
-    return today.day in (1, 2, 3, 4, 5, 6, 7)
+    # We will make three attempts to generate the monthly report every month
+    return today.day in (1, 2, 3)
 
 
 def main():
@@ -20,11 +21,11 @@ def main():
 
     today = dt.today()
 
-    start_date = (today - timedelta(days=7)).strftime('%Y-%m-%d')
+    start_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
     end_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
-    weekly_response = r.get_report(start_date, end_date, 'weekly')
-    logger.debug('Weekly report data from {s} to {e}'.format(s=start_date, e=end_date))
-    logger.debug(json.dumps(weekly_response, indent=2))
+    daily_response = r.get_report(start_date, end_date, 'daily')
+    logger.debug('Daily report data from {s} to {e}'.format(s=start_date, e=end_date))
+    logger.debug(json.dumps(daily_response, indent=2))
 
     if time_to_generate_monthly_report(today):
         last_day_of_prev_month = date(today.year, today.month, 1) - timedelta(days=1)
diff --git a/f8a_report/report_helper.py b/f8a_report/report_helper.py
@@ -128,12 +128,19 @@ def retrieve_stack_analyses_ids(self, start_date, end_date):
             raise ValueError("Invalid date format")
 
         # Avoiding SQL injection
-        query = sql.SQL('SELECT {} FROM {} WHERE {} BETWEEN \'%s\' AND \'%s\'').format(
-            sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
-            sql.Identifier('submitTime')
-        )
+        if start_date == end_date:
+            query = sql.SQL('SELECT {} FROM {} WHERE {} = \'%s\'').format(
+                sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
+                sql.Identifier('submitTime')
+            )
+            self.cursor.execute(query.as_string(self.conn) % start_date)
+        else:
+            query = sql.SQL('SELECT {} FROM {} WHERE {} BETWEEN \'%s\' AND \'%s\'').format(
+                sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
+                sql.Identifier('submitTime')
+            )
+            self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
 
-        self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
         rows = self.cursor.fetchall()
 
         id_list = []
@@ -262,7 +269,7 @@ def store_training_data(self, result):
                 self.s3.store_json_content(content=training_data, bucket_name=bucket_name,
                                            obj_key=obj_key)
 
-    def normalize_worker_data(self, start_date, end_date, stack_data, worker, frequency='weekly'):
+    def normalize_worker_data(self, start_date, end_date, stack_data, worker, frequency='daily'):
         """Normalize worker data for reporting."""
         total_stack_requests = {'all': 0, 'npm': 0, 'maven': 0}
         if frequency == 'monthly':
@@ -353,13 +360,19 @@ def normalize_worker_data(self, start_date, end_date, stack_data, worker, freque
                 'maven': self.populate_key_count(stacks_list['maven'])
             }
 
-            # Collate Data from Previous Month for Model Retraining
-            collated_data = self.collate_raw_data(unique_stacks_with_recurrence_count, frequency)
-
-            # Store ecosystem specific data to their respective Training Buckets
-            if frequency == 'weekly':
+            today = dt.today()
+            # Invoke this every Monday. In Python, Monday is 0 and Sunday is 6
+            if today.weekday() == 0:
+                # Collate Data from Previous Month for Model Retraining
+                collated_data = self.collate_raw_data(unique_stacks_with_recurrence_count,
+                                                      'weekly')
+                # Store ecosystem specific data to their respective Training Buckets
                 self.store_training_data(collated_data)
 
+            # Monthly data collection on the 1st of every month
+            if today.date == 1:
+                self.collate_raw_data(unique_stacks_with_recurrence_count, 'monthly')
+
             unique_stacks_with_deps_count =\
                 self.set_unique_stack_deps_count(unique_stacks_with_recurrence_count)
 
@@ -420,7 +433,7 @@ def normalize_worker_data(self, start_date, end_date, stack_data, worker, freque
             return None
 
     def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=[],
-                                frequency='weekly'):
+                                frequency='daily'):
         """Retrieve results for selected worker from RDB."""
         result = {}
         # convert the elements of the id_list to sql.Literal
@@ -442,7 +455,7 @@ def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=
                                                         frequency)
         return result
 
-    def get_report(self, start_date, end_date, frequency='weekly'):
+    def get_report(self, start_date, end_date, frequency='daily'):
         """Generate the stacks report."""
         ids = self.retrieve_stack_analyses_ids(start_date, end_date)
         if len(ids) > 0:
diff --git a/openshift/template.yaml b/openshift/template.yaml
@@ -104,7 +104,7 @@ parameters:
   displayName: Schedule
   required: true
   name: CRON_SCHEDULE
-  value: "0 0 * * 0"
+  value: "0 0 * * *"
 
 - description: CPU request
   displayName: CPU request