55from mapswipe_workers .utils import geojson_functions
66
77
8- def get_overall_stats (filename : str ) -> pd .DataFrame :
8+ def get_overall_stats (projects_df : pd . DataFrame , filename : str ) -> pd .DataFrame :
99 """
10- The function queries the projects table and row_counts table in postgres.
11- The query results are stored in the specified csv file.
12- And also returned as a pandas DataFrame.
10+ The function aggregates the statistics per project using the status attribute.
11+ We derive aggregated statistics for active, inactive and finished projects.
12+ The number of users should not be summed up here, since this would generate wrong results.
13+ A single user can contribute to multiple projects, we need to consider this.
1314
1415 Parameters
1516 ----------
17+ projects_df: pd.DataFrame
1618 filename: str
1719 """
1820
19- pg_db = auth .postgresDB ()
20- sql_query = """
21- COPY (
22- SELECT
23- 'all' as status
24- ,count(*) as count_projects
25- ,round(SUM(ST_Area(geom::geography)/(1000*1000))::numeric, 1) as area_projects_sqkm
26- ,(SELECT reltuples FROM row_counts WHERE relname = 'groups') as count_groups
27- ,(SELECT reltuples FROM row_counts WHERE relname = 'tasks') as count_tasks
28- ,(SELECT reltuples FROM row_counts WHERE relname = 'results') as count_results
29- ,(SELECT count(*) FROM users) as count_users
30- ,clock_timestamp()
31- FROM projects
32- UNION
33- SELECT
34- status
35- ,count(*) as count_projects
36- ,round(SUM(ST_Area(geom::geography)/(1000*1000))::numeric, 1) as area_sqkm
37- ,NULL
38- ,NULL
39- ,NULL
40- ,NULL
41- ,clock_timestamp()
42- FROM projects
43- GROUP BY
44- status
45- ORDER BY count_tasks
46- ) TO STDOUT WITH CSV HEADER"""
47-
48- with open (filename , "w" ) as f :
49- pg_db .copy_expert (sql_query , f )
21+ overall_stats_df = projects_df .groupby (['status' ]).agg (
22+ count_projects = pd .NamedAgg (column = 'project_id' , aggfunc = 'count' ),
23+ area_sqkm = pd .NamedAgg (column = 'area_sqkm' , aggfunc = 'sum' ),
24+ number_of_results = pd .NamedAgg (column = 'number_of_results' , aggfunc = 'sum' ),
25+ number_of_results_progress = pd .NamedAgg (column = 'number_of_results_progress' , aggfunc = 'sum' ),
26+ average_number_of_users_per_project = pd .NamedAgg (column = 'number_of_users' , aggfunc = 'mean' )
27+ )
5028
51- del pg_db
52- logger .info ("got overall stats from postgres." )
29+ overall_stats_df . to_csv ( filename , index_label = "status" )
30+ logger .info (f'saved overall stats to { filename } ' )
5331
54- df = pd .read_csv (filename )
55- return df
32+ return overall_stats_df
5633
5734
5835def get_project_static_info (filename : str ) -> pd .DataFrame :
@@ -121,7 +98,7 @@ def load_project_info_dynamic(filename: str) -> pd.DataFrame:
12198 return df
12299
123100
124- def save_projects (filename : str , df : pd .DataFrame , df_dynamic : pd .DataFrame ) -> None :
101+ def save_projects (filename : str , df : pd .DataFrame , df_dynamic : pd .DataFrame ) -> pd . DataFrame :
125102 """
126103 The function merges the dataframes for static and dynamic project information
127104 and then save the result as csv file.
@@ -143,3 +120,5 @@ def save_projects(filename: str, df: pd.DataFrame, df_dynamic: pd.DataFrame) ->
143120 logger .info (f"saved projects: { filename } " )
144121 geojson_functions .csv_to_geojson (filename , "geom" )
145122 geojson_functions .csv_to_geojson (filename , "centroid" )
123+
124+ return projects_df
0 commit comments