Skip to content

Commit a5a734c

Browse files
authored
Merge pull request #31 from samuzzal-choudhury/daily_job
Job scheduled daily instead of monthly
2 parents 33795db + 936f1c0 commit a5a734c

File tree

3 files changed

+33
-19
lines changed

3 files changed

+33
-19
lines changed

f8a_report/main.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
def time_to_generate_monthly_report(today):
1313
"""Check whether it is the right time to generate monthly report."""
14-
return today.day in (1, 2, 3, 4, 5, 6, 7)
14+
# We will make three attempts to generate the monthly report every month
15+
return today.day in (1, 2, 3)
1516

1617

1718
def main():
@@ -20,11 +21,11 @@ def main():
2021

2122
today = dt.today()
2223

23-
start_date = (today - timedelta(days=7)).strftime('%Y-%m-%d')
24+
start_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
2425
end_date = (today - timedelta(days=1)).strftime('%Y-%m-%d')
25-
weekly_response = r.get_report(start_date, end_date, 'weekly')
26-
logger.debug('Weekly report data from {s} to {e}'.format(s=start_date, e=end_date))
27-
logger.debug(json.dumps(weekly_response, indent=2))
26+
daily_response = r.get_report(start_date, end_date, 'daily')
27+
logger.debug('Daily report data from {s} to {e}'.format(s=start_date, e=end_date))
28+
logger.debug(json.dumps(daily_response, indent=2))
2829

2930
if time_to_generate_monthly_report(today):
3031
last_day_of_prev_month = date(today.year, today.month, 1) - timedelta(days=1)

f8a_report/report_helper.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,19 @@ def retrieve_stack_analyses_ids(self, start_date, end_date):
128128
raise ValueError("Invalid date format")
129129

130130
# Avoiding SQL injection
131-
query = sql.SQL('SELECT {} FROM {} WHERE {} BETWEEN \'%s\' AND \'%s\'').format(
132-
sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
133-
sql.Identifier('submitTime')
134-
)
131+
if start_date == end_date:
132+
query = sql.SQL('SELECT {} FROM {} WHERE {} = \'%s\'').format(
133+
sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
134+
sql.Identifier('submitTime')
135+
)
136+
self.cursor.execute(query.as_string(self.conn) % start_date)
137+
else:
138+
query = sql.SQL('SELECT {} FROM {} WHERE {} BETWEEN \'%s\' AND \'%s\'').format(
139+
sql.Identifier('id'), sql.Identifier('stack_analyses_request'),
140+
sql.Identifier('submitTime')
141+
)
142+
self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
135143

136-
self.cursor.execute(query.as_string(self.conn) % (start_date, end_date))
137144
rows = self.cursor.fetchall()
138145

139146
id_list = []
@@ -262,7 +269,7 @@ def store_training_data(self, result):
262269
self.s3.store_json_content(content=training_data, bucket_name=bucket_name,
263270
obj_key=obj_key)
264271

265-
def normalize_worker_data(self, start_date, end_date, stack_data, worker, frequency='weekly'):
272+
def normalize_worker_data(self, start_date, end_date, stack_data, worker, frequency='daily'):
266273
"""Normalize worker data for reporting."""
267274
total_stack_requests = {'all': 0, 'npm': 0, 'maven': 0}
268275
if frequency == 'monthly':
@@ -353,13 +360,19 @@ def normalize_worker_data(self, start_date, end_date, stack_data, worker, freque
353360
'maven': self.populate_key_count(stacks_list['maven'])
354361
}
355362

356-
# Collate Data from Previous Month for Model Retraining
357-
collated_data = self.collate_raw_data(unique_stacks_with_recurrence_count, frequency)
358-
359-
# Store ecosystem specific data to their respective Training Buckets
360-
if frequency == 'weekly':
363+
today = dt.today()
364+
# Invoke this every Monday. In Python, Monday is 0 and Sunday is 6
365+
if today.weekday() == 0:
366+
# Collate Data from Previous Month for Model Retraining
367+
collated_data = self.collate_raw_data(unique_stacks_with_recurrence_count,
368+
'weekly')
369+
# Store ecosystem specific data to their respective Training Buckets
361370
self.store_training_data(collated_data)
362371

372+
# Monthly data collection on the 1st of every month
373+
if today.date == 1:
374+
self.collate_raw_data(unique_stacks_with_recurrence_count, 'monthly')
375+
363376
unique_stacks_with_deps_count =\
364377
self.set_unique_stack_deps_count(unique_stacks_with_recurrence_count)
365378

@@ -420,7 +433,7 @@ def normalize_worker_data(self, start_date, end_date, stack_data, worker, freque
420433
return None
421434

422435
def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=[],
423-
frequency='weekly'):
436+
frequency='daily'):
424437
"""Retrieve results for selected worker from RDB."""
425438
result = {}
426439
# convert the elements of the id_list to sql.Literal
@@ -442,7 +455,7 @@ def retrieve_worker_results(self, start_date, end_date, id_list=[], worker_list=
442455
frequency)
443456
return result
444457

445-
def get_report(self, start_date, end_date, frequency='weekly'):
458+
def get_report(self, start_date, end_date, frequency='daily'):
446459
"""Generate the stacks report."""
447460
ids = self.retrieve_stack_analyses_ids(start_date, end_date)
448461
if len(ids) > 0:

openshift/template.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ parameters:
104104
displayName: Schedule
105105
required: true
106106
name: CRON_SCHEDULE
107-
value: "0 0 * * 0"
107+
value: "0 0 * * *"
108108

109109
- description: CPU request
110110
displayName: CPU request

0 commit comments

Comments
 (0)