@@ -38,14 +38,18 @@ def get_results_by_project_id(filename, project_id):
3838 logger .info (f'got results from postgres for { project_id } ' )
3939
4040 df = pd .read_csv (filename )
41- df ['group_id' ] = df .apply (lambda row : id_to_string (row ['group_id' ]), axis = 1 )
42- df ['group_id' ] = df ['group_id' ].astype (str )
43- df ['timestamp' ] = pd .to_datetime (df ['timestamp' ])
44- df ['day' ] = df ['timestamp' ].apply (
45- lambda df : datetime .datetime (year = df .year , month = df .month , day = df .day ))
4641
47- logger .info (f'created pandas results df for { project_id } ' )
48- return df
42+ if len (df ) > 0 :
43+ df ['group_id' ] = df .apply (lambda row : id_to_string (row ['group_id' ]), axis = 1 )
44+ df ['group_id' ] = df ['group_id' ].astype (str )
45+ df ['timestamp' ] = pd .to_datetime (df ['timestamp' ])
46+ df ['day' ] = df ['timestamp' ].apply (
47+ lambda df : datetime .datetime (year = df .year , month = df .month , day = df .day ))
48+ logger .info (f'created pandas results df for { project_id } ' )
49+ return df
50+ else :
51+ logger .info (f'there are no results for this project { project_id } ' )
52+ return None
4953
5054
5155def get_tasks_by_project_id (filename , project_id ):
@@ -246,34 +250,40 @@ def get_per_project_statistics(project_id):
246250
247251 # load data from postgres or local storage if already downloaded
248252 results_df = get_results_by_project_id (results_filename , project_id )
249- groups_df = get_groups_by_project_id (groups_filename , project_id )
250- tasks_df = get_tasks_by_project_id (tasks_filename , project_id )
251-
252- # aggregate results by task id
253- agg_results_df = agg_results_by_task_id (results_df , tasks_df )
254- agg_results_df .to_csv (agg_results_filename , index_label = 'idx' )
255- logger .info (f'saved agg results for { project_id } : { agg_results_filename } ' )
256- geojson_functions .csv_to_geojson (agg_results_filename , 'geom' )
257-
258- # calculate progress by date
259- progress_by_date_df = get_progress_by_date (results_df , groups_df )
260-
261- # calculate contributors by date
262- contributors_by_date_df = get_contributors_by_date (results_df )
263-
264- # merge contributors and progress
265- project_stats_by_date_df = progress_by_date_df .merge (contributors_by_date_df , left_on = 'day' , right_on = 'day' )
266- project_stats_by_date_df ['project_id' ] = project_id
267- project_stats_by_date_df .to_csv (project_stats_by_date_filename )
268- logger .info (f'saved project stats by date for { project_id } : { project_stats_by_date_filename } ' )
269-
270- project_stats_dict = {
271- 'project_id' : project_id ,
272- 'progress' : project_stats_by_date_df ['cum_progress' ].iloc [- 1 ],
273- 'number_of_users' : project_stats_by_date_df ['cum_number_of_users' ].iloc [- 1 ],
274- 'number_of_results' : project_stats_by_date_df ['cum_number_of_results' ].iloc [- 1 ],
275- 'number_of_results_progress' : project_stats_by_date_df ['cum_number_of_results_progress' ].iloc [- 1 ],
276- 'day' : project_stats_by_date_df .index [- 1 ]
277- }
278-
279- return project_stats_dict
253+
254+ if results_df is None :
255+ logger .info (f'no results: skipping per project stats for { project_id } ' )
256+ return None
257+ else :
258+ groups_df = get_groups_by_project_id (groups_filename , project_id )
259+ tasks_df = get_tasks_by_project_id (tasks_filename , project_id )
260+
261+ # aggregate results by task id
262+ agg_results_df = agg_results_by_task_id (results_df , tasks_df )
263+ agg_results_df .to_csv (agg_results_filename , index_label = 'idx' )
264+ logger .info (f'saved agg results for { project_id } : { agg_results_filename } ' )
265+ geojson_functions .csv_to_geojson (agg_results_filename , 'geom' )
266+
267+ # calculate progress by date
268+ progress_by_date_df = get_progress_by_date (results_df , groups_df )
269+
270+ # calculate contributors by date
271+ contributors_by_date_df = get_contributors_by_date (results_df )
272+
273+ # merge contributors and progress
274+ project_stats_by_date_df = progress_by_date_df .merge (contributors_by_date_df , left_on = 'day' , right_on = 'day' )
275+ project_stats_by_date_df ['project_id' ] = project_id
276+ project_stats_by_date_df .to_csv (project_stats_by_date_filename )
277+ logger .info (f'saved project stats by date for { project_id } : { project_stats_by_date_filename } ' )
278+
279+ project_stats_dict = {
280+ 'project_id' : project_id ,
281+ 'progress' : project_stats_by_date_df ['cum_progress' ].iloc [- 1 ],
282+ 'number_of_users' : project_stats_by_date_df ['cum_number_of_users' ].iloc [- 1 ],
283+ 'number_of_results' : project_stats_by_date_df ['cum_number_of_results' ].iloc [- 1 ],
284+ 'number_of_results_progress' : project_stats_by_date_df ['cum_number_of_results_progress' ].iloc [- 1 ],
285+ 'day' : project_stats_by_date_df .index [- 1 ]
286+ }
287+
288+ return project_stats_dict
289+
0 commit comments