20
20
import squarify
21
21
22
22
23
- def __get_general_modality_plot (df ):
23
+ def __create_general_modality_plot (df ):
24
+ """
25
+ Create a bar plot to visualize the frequency of general modalities.
26
+
27
+ This function takes a pandas DataFrame as input and generates a bar plot to display the frequency
28
+ of different general modalities in the 'generalmodality' column of the DataFrame.
29
+
30
+ Parameters:
31
+ df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
32
+
33
+ Returns:
34
+ None: The function generates a bar plot but does not return any value. The plot is saved as an
35
+ image file with a filename in the format 'general-modality-YYYYMMDD.png', where 'YYYYMMDD'
36
+ represents the current date in year-month-day format.
37
+ """
38
+
24
39
modality_counts = df ["generalmodality" ].value_counts ()
25
40
26
41
plt .figure (figsize = (10 , 6 ))
@@ -41,10 +56,20 @@ def __get_general_modality_plot(df):
41
56
42
57
def get_random_sample (df ):
43
58
"""
44
- Returns a random sample from the dataframe from a dataset with non-zero score .
59
+ Retrieve a random JSON file from the DataFrame .
45
60
46
- Input: dataframe
47
- Output:open the json file that was located in datasets Brain Image Library dataframe
61
+ This function takes a pandas DataFrame as input and filters it to keep only the rows that have
62
+ a non-zero 'score' value. From the filtered rows, it selects a random row using the 'random.randint()'
63
+ function. It then generates a valid link to the JSON file by replacing '/bil/data' with
64
+ 'https://download.brainimagelibrary.org' in the 'json_file' column of the selected row. The function
65
+ performs an HTTP GET request to download the JSON file from the generated link, and it returns the
66
+ JSON data as a Python dictionary.
67
+
68
+ Parameters:
69
+ df (pandas.DataFrame): The input DataFrame containing the 'score' and 'json_file' columns.
70
+
71
+ Returns:
72
+ dict: A Python dictionary containing the JSON data retrieved from a random row's JSON file.
48
73
"""
49
74
50
75
isNotZero = df [df ["score" ] != 0.0 ] # only have files with the correct data
@@ -70,11 +95,23 @@ def __get_lable_dict(name_lst):
70
95
}
71
96
72
97
73
- def __get_general_modality_treemap (df ):
98
+ def __create_general_modality_treemap (df ):
74
99
"""
75
- input: dataframe
76
- output: tree map that displays the frequencies of "generalmodality" that occur in dataframe
100
+ Create a treemap visualization for the general modality data.
101
+
102
+ This function takes a pandas DataFrame as input and generates a treemap visualization based on
103
+ the counts of different modalities in the 'generalmodality' column of the DataFrame. The function
104
+ utilizes the Squarify library to create the treemap.
105
+
106
+ Parameters:
107
+ df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
108
+
109
+ Returns:
110
+ None: The function generates a treemap and saves it as an image file, but it does not return any value.
111
+ The treemap is saved with a filename in the format 'treemap-general-modality-YYYYMMDD.png', where
112
+ 'YYYYMMDD' represents the current date in year-month-day format.
77
113
"""
114
+
78
115
modality_counts = df ["generalmodality" ].value_counts ().to_dict ()
79
116
plt .figure (figsize = (14 , 10 ))
80
117
values = list (modality_counts .values ())
@@ -94,16 +131,29 @@ def __get_general_modality_treemap(df):
94
131
plt .legend (
95
132
legend_patches , name , loc = "upper left" , bbox_to_anchor = (1 , 1 ), fontsize = "medium"
96
133
)
97
- plt .show ()
134
+
135
+ filename = f'treemap-general-modality-{ datetime .now ().strftime ("%Y%m%d" )} .png'
136
+ plt .savefig (filename )
98
137
99
138
100
139
def __get_pretty_size_statistics (df ):
101
140
"""
102
- Pretty version of __get_size_statistics
141
+ Get human-readable size statistics from the DataFrame.
103
142
104
- Input: dataframe
105
- Output: list of strings
143
+ This method takes a pandas DataFrame as input and calculates size statistics using the '__get_size_statistics()'
144
+ method. The statistics include the minimum, maximum, mean, and total size of the data in the DataFrame.
145
+
146
+ Parameters:
147
+ df (pandas.DataFrame): The input DataFrame.
148
+
149
+ Returns:
150
+ list: A list containing human-readable size statistics in the following order:
151
+ - Human-readable minimum size.
152
+ - Human-readable maximum size.
153
+ - Human-readable mean size.
154
+ - Human-readable total size.
106
155
"""
156
+
107
157
size_stats = __get_size_statistics (df )
108
158
109
159
return [
@@ -116,10 +166,20 @@ def __get_pretty_size_statistics(df):
116
166
117
167
def __get_size_statistics (df ):
118
168
"""
119
- Helper method that returns size statistics from size column .
169
+ Calculate basic size statistics from the DataFrame .
120
170
121
- Input: dataframe
122
- Output: list of numbers
171
+ This method takes a pandas DataFrame as input and calculates basic size statistics, including the minimum,
172
+ maximum, mean, and standard deviation of the 'size' column in the DataFrame.
173
+
174
+ Parameters:
175
+ df (pandas.DataFrame): The input DataFrame containing the 'size' column.
176
+
177
+ Returns:
178
+ list: A list containing the size statistics in the following order:
179
+ - Minimum size.
180
+ - Maximum size.
181
+ - Mean size.
182
+ - Standard deviation of sizes.
123
183
"""
124
184
125
185
min = df ["size" ].min ()
@@ -185,6 +245,63 @@ def get_date(df):
185
245
return f"{ yr } -{ day } -{ mnt } " # format in year-day-month
186
246
187
247
248
+ import geoip2 .database
249
+ from geopy .geocoders import Nominatim
250
+ import folium
251
+ import math
252
+ import urllib .request
253
+
254
+ """print(c.get_country_cities(country_code_iso="DE"))"""
255
+ """
256
+ Geopy: input: University #correct some data (do later) Output: Address, lat, lon
257
+ folium or Ivan's
258
+ Must choose module to make the map
259
+ """
260
+
261
+
262
+ def __get_affiliations (df ):
263
+ return df ["affiliation" ].value_counts ().keys ()
264
+
265
+
266
+ def __get_coordin (university ):
267
+ geolocator = Nominatim (user_agent = "my_geocoding_app" )
268
+ try :
269
+ location = geolocator .geocode (university )
270
+ if location :
271
+ return location .latitude , location .longitude
272
+ else :
273
+ return (0 , 0 )
274
+ except :
275
+ print (f"Geocoding service is unavailable for { university } " )
276
+ return 0 , 0
277
+
278
+
279
+ def get_zero_coords (affiliation_coordinates ):
280
+ total = 0
281
+ zeros = 0
282
+ for university in affiliation_coordinates :
283
+ if affiliation_coordinates [university ] == (0 , 0 ):
284
+ zeros += 1
285
+ print (affiliation_coordinates [university ], university , "ain't working" )
286
+ total += 1
287
+ return zeros , total , math .ceil (zeros / total * 100 ) / 100
288
+
289
+
290
+ def __get_affilation_coordinates (df ):
291
+ affiliations_dict = {}
292
+ for university in __get_affiliations (df ):
293
+ latitude , longitude = __get_coordin (university )
294
+ if latitude is not None and longitude is not None :
295
+ affiliations_dict [university ] = (latitude , longitude )
296
+ return affiliations_dict
297
+
298
+
299
+ if __name__ == "__main__" :
300
+ affiliation_coordinates = __get_affilation_coordinates (df )
301
+ print (affiliation_coordinates )
302
+ print (get_zero_coords (affiliation_coordinates ))
303
+
304
+
188
305
def today ():
189
306
"""
190
307
Get today's snapshot of Brain Image Library.
@@ -981,3 +1098,34 @@ def report():
981
1098
get_projects_treemap (df )
982
1099
983
1100
return report
1101
+
1102
+
1103
+ import pandas as pd
1104
+ import urllib .request
1105
+ import geoip2 .database
1106
+ from geopy .geocoders import Nominatim
1107
+ import folium
1108
+
1109
+ """
1110
+ Import modules that will be used to create the world map, find coordinates of affiliations, and
1111
+ """
1112
+
1113
+ url = "https://download.brainimagelibrary.org/inventory/daily/reports/today.json"
1114
+ file_path , _ = urllib .request .urlretrieve (url )
1115
+ df = pd .read_json (file_path )
1116
+ df
1117
+ """
1118
+ Geopy - Input: University Output: Address, lat, lon
1119
+ Folium - visual map creator
1120
+ """
1121
+
1122
+ map = folium .Map ()
1123
+
1124
+ from tqdm import tqdm
1125
+
1126
+ for index , row in tqdm (df .iterrows ()):
1127
+ city = row ["city" ]
1128
+ lat = row ["lat" ]
1129
+ lon = row ["lng" ]
1130
+ folium .Marker ([lat , lon ], popup = city ).add_to (map )
1131
+ map .save ("project_map.html" )
0 commit comments