55from mapswipe_workers .utils import geojson_functions
66
77
8- def get_project_static_info (filename ):
8+ def get_overall_stats (projects_df : pd .DataFrame , filename : str ) -> pd .DataFrame :
9+ """
10+ The function aggregates the statistics per project using the status attribute.
11+ We derive aggregated statistics for active, inactive and finished projects.
12+ The number of users should not be summed up here, since this would generate wrong results.
13+ A single user can contribute to multiple projects, we need to consider this.
14+
15+ Parameters
16+ ----------
17+ projects_df: pd.DataFrame
18+ filename: str
19+ """
20+
21+ overall_stats_df = projects_df .groupby (['status' ]).agg (
22+ count_projects = pd .NamedAgg (column = 'project_id' , aggfunc = 'count' ),
23+ area_sqkm = pd .NamedAgg (column = 'area_sqkm' , aggfunc = 'sum' ),
24+ number_of_results = pd .NamedAgg (column = 'number_of_results' , aggfunc = 'sum' ),
25+ number_of_results_progress = pd .NamedAgg (column = 'number_of_results_progress' , aggfunc = 'sum' ),
26+ average_number_of_users_per_project = pd .NamedAgg (column = 'number_of_users' , aggfunc = 'mean' )
27+ )
28+
29+ overall_stats_df .to_csv (filename , index_label = "status" )
30+ logger .info (f'saved overall stats to { filename } ' )
31+
32+ return overall_stats_df
33+
34+
35+ def get_project_static_info (filename : str ) -> pd .DataFrame :
36+ """
37+ The function queries the projects table.
38+ Each row represents a single project and provides the information which is static.
39+ By static we understand all attributes which are not affected by new results being contributed.
40+ The results are stored in a csv file and also returned as a pandas DataFrame.
41+
42+ Parameters
43+ ----------
44+ filename: str
45+ """
946
1047 pg_db = auth .postgresDB ()
48+
49+ # make sure to replace newline characters here
1150 sql_query = """
1251 COPY (
1352 SELECT
1453 project_id
15- ,name
16- ,project_details
17- ,look_for
54+ ,regexp_replace(name, E'[ \\ n \\ r]+', ' ', 'g' ) as name
55+ ,regexp_replace(project_details, E'[ \\ n \\ r]+', ' ', 'g' ) as project_details
56+ ,regexp_replace(look_for, E'[ \\ n \\ r]+', ' ', 'g' ) as look_for
1857 ,project_type
19- ,status
58+ ,regexp_replace(status, E'[ \\ n \\ r]+', ' ', 'g' ) as status
2059 ,ST_Area(geom::geography)/1000000 as area_sqkm
2160 ,ST_AsText(geom) as geom
2261 ,ST_AsText(ST_Centroid(geom)) as centroid
@@ -30,10 +69,20 @@ def get_project_static_info(filename):
3069 logger .info ("got projects from postgres." )
3170
3271 df = pd .read_csv (filename )
72+
3373 return df
3474
3575
36- def load_project_info_dynamic (filename ):
76+ def load_project_info_dynamic (filename : str ) -> pd .DataFrame :
77+ """
78+ The function loads data from a csv file into a pandas dataframe.
79+ If not file exists, it will be initialized.
80+
81+ Parameters
82+ ----------
83+ filename: str
84+ """
85+
3786 if os .path .isfile (filename ):
3887 logger .info (f"file { filename } exists. Init from this file." )
3988 df = pd .read_csv (filename , index_col = "idx" )
@@ -52,11 +101,27 @@ def load_project_info_dynamic(filename):
52101 return df
53102
54103
55- def save_projects (filename , df , df_dynamic ):
104+ def save_projects (filename : str , df : pd .DataFrame , df_dynamic : pd .DataFrame ) -> pd .DataFrame :
105+ """
106+ The function merges the dataframes for static and dynamic project information
107+ and then save the result as csv file.
108+ Additionally, two geojson files are generated using
109+ (a) the geometry of the projects and
110+ (b) the centroid of the projects.
111+
112+ Parameters
113+ ----------
114+ filename: str
115+ df: pd.DataFrame
116+ df_dynamic: pd.DataFrame
117+ """
118+
56119 projects_df = df .merge (
57120 df_dynamic , left_on = "project_id" , right_on = "project_id" , how = "left"
58121 )
59- projects_df .to_csv (filename , index_label = "idx" )
122+ projects_df .to_csv (filename , index_label = "idx" , line_terminator = ' \n ' )
60123 logger .info (f"saved projects: { filename } " )
61124 geojson_functions .csv_to_geojson (filename , "geom" )
62125 geojson_functions .csv_to_geojson (filename , "centroid" )
126+
127+ return projects_df
0 commit comments