2020import squarify
2121
2222
23- def __get_general_modality_plot (df ):
23+ def __create_general_modality_plot (df ):
24+ """
25+ Create a bar plot to visualize the frequency of general modalities.
26+
27+ This function takes a pandas DataFrame as input and generates a bar plot to display the frequency
28+ of different general modalities in the 'generalmodality' column of the DataFrame.
29+
30+ Parameters:
31+ df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
32+
33+ Returns:
34+ None: The function generates a bar plot but does not return any value. The plot is saved as an
35+ image file with a filename in the format 'general-modality-YYYYMMDD.png', where 'YYYYMMDD'
36+ represents the current date in year-month-day format.
37+ """
38+
2439 modality_counts = df ["generalmodality" ].value_counts ()
2540
2641 plt .figure (figsize = (10 , 6 ))
@@ -41,10 +56,20 @@ def __get_general_modality_plot(df):
4156
4257def get_random_sample (df ):
4358 """
44- Returns a random sample from the dataframe from a dataset with non-zero score .
59+ Retrieve a random JSON file from the DataFrame .
4560
46- Input: dataframe
47- Output:open the json file that was located in datasets Brain Image Library dataframe
61+ This function takes a pandas DataFrame as input and filters it to keep only the rows that have
62+ a non-zero 'score' value. From the filtered rows, it selects a random row using the 'random.randint()'
63+ function. It then generates a valid link to the JSON file by replacing '/bil/data' with
64+ 'https://download.brainimagelibrary.org' in the 'json_file' column of the selected row. The function
65+ performs an HTTP GET request to download the JSON file from the generated link, and it returns the
66+ JSON data as a Python dictionary.
67+
68+ Parameters:
69+ df (pandas.DataFrame): The input DataFrame containing the 'score' and 'json_file' columns.
70+
71+ Returns:
72+ dict: A Python dictionary containing the JSON data retrieved from a random row's JSON file.
4873 """
4974
5075 isNotZero = df [df ["score" ] != 0.0 ] # only have files with the correct data
@@ -70,11 +95,23 @@ def __get_lable_dict(name_lst):
7095 }
7196
7297
73- def __get_general_modality_treemap (df ):
98+ def __create_general_modality_treemap (df ):
7499 """
75- input: dataframe
76- output: tree map that displays the frequencies of "generalmodality" that occur in dataframe
100+ Create a treemap visualization for the general modality data.
101+
102+ This function takes a pandas DataFrame as input and generates a treemap visualization based on
103+ the counts of different modalities in the 'generalmodality' column of the DataFrame. The function
104+ utilizes the Squarify library to create the treemap.
105+
106+ Parameters:
107+ df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
108+
109+ Returns:
110+ None: The function generates a treemap and saves it as an image file, but it does not return any value.
111+ The treemap is saved with a filename in the format 'treemap-general-modality-YYYYMMDD.png', where
112+ 'YYYYMMDD' represents the current date in year-month-day format.
77113 """
114+
78115 modality_counts = df ["generalmodality" ].value_counts ().to_dict ()
79116 plt .figure (figsize = (14 , 10 ))
80117 values = list (modality_counts .values ())
@@ -94,16 +131,29 @@ def __get_general_modality_treemap(df):
94131 plt .legend (
95132 legend_patches , name , loc = "upper left" , bbox_to_anchor = (1 , 1 ), fontsize = "medium"
96133 )
97- plt .show ()
134+
135+ filename = f'treemap-general-modality-{ datetime .now ().strftime ("%Y%m%d" )} .png'
136+ plt .savefig (filename )
98137
99138
100139def __get_pretty_size_statistics (df ):
101140 """
102- Pretty version of __get_size_statistics
141+ Get human-readable size statistics from the DataFrame.
103142
104- Input: dataframe
105- Output: list of strings
143+ This method takes a pandas DataFrame as input and calculates size statistics using the '__get_size_statistics()'
144+ method. The statistics include the minimum, maximum, mean, and total size of the data in the DataFrame.
145+
146+ Parameters:
147+ df (pandas.DataFrame): The input DataFrame.
148+
149+ Returns:
150+ list: A list containing human-readable size statistics in the following order:
151+ - Human-readable minimum size.
152+ - Human-readable maximum size.
153+ - Human-readable mean size.
154+ - Human-readable total size.
106155 """
156+
107157 size_stats = __get_size_statistics (df )
108158
109159 return [
@@ -116,10 +166,20 @@ def __get_pretty_size_statistics(df):
116166
117167def __get_size_statistics (df ):
118168 """
119- Helper method that returns size statistics from size column .
169+ Calculate basic size statistics from the DataFrame .
120170
121- Input: dataframe
122- Output: list of numbers
171+ This method takes a pandas DataFrame as input and calculates basic size statistics, including the minimum,
172+ maximum, mean, and standard deviation of the 'size' column in the DataFrame.
173+
174+ Parameters:
175+ df (pandas.DataFrame): The input DataFrame containing the 'size' column.
176+
177+ Returns:
178+ list: A list containing the size statistics in the following order:
179+ - Minimum size.
180+ - Maximum size.
181+ - Mean size.
182+ - Standard deviation of sizes.
123183 """
124184
125185 min = df ["size" ].min ()
@@ -185,6 +245,63 @@ def get_date(df):
185245 return f"{ yr } -{ day } -{ mnt } " # format in year-day-month
186246
187247
248+ import geoip2 .database
249+ from geopy .geocoders import Nominatim
250+ import folium
251+ import math
252+ import urllib .request
253+
254+ """print(c.get_country_cities(country_code_iso="DE"))"""
255+ """
256+ Geopy: input: University #correct some data (do later) Output: Address, lat, lon
257+ folium or Ivan's
258+ Must choose module to make the map
259+ """
260+
261+
262+ def __get_affiliations (df ):
263+ return df ["affiliation" ].value_counts ().keys ()
264+
265+
266+ def __get_coordin (university ):
267+ geolocator = Nominatim (user_agent = "my_geocoding_app" )
268+ try :
269+ location = geolocator .geocode (university )
270+ if location :
271+ return location .latitude , location .longitude
272+ else :
273+ return (0 , 0 )
274+ except :
275+ print (f"Geocoding service is unavailable for { university } " )
276+ return 0 , 0
277+
278+
279+ def get_zero_coords (affiliation_coordinates ):
280+ total = 0
281+ zeros = 0
282+ for university in affiliation_coordinates :
283+ if affiliation_coordinates [university ] == (0 , 0 ):
284+ zeros += 1
285+ print (affiliation_coordinates [university ], university , "ain't working" )
286+ total += 1
287+ return zeros , total , math .ceil (zeros / total * 100 ) / 100
288+
289+
290+ def __get_affilation_coordinates (df ):
291+ affiliations_dict = {}
292+ for university in __get_affiliations (df ):
293+ latitude , longitude = __get_coordin (university )
294+ if latitude is not None and longitude is not None :
295+ affiliations_dict [university ] = (latitude , longitude )
296+ return affiliations_dict
297+
298+
299+ if __name__ == "__main__" :
300+ affiliation_coordinates = __get_affilation_coordinates (df )
301+ print (affiliation_coordinates )
302+ print (get_zero_coords (affiliation_coordinates ))
303+
304+
188305def today ():
189306 """
190307 Get today's snapshot of Brain Image Library.
@@ -981,3 +1098,34 @@ def report():
9811098 get_projects_treemap (df )
9821099
9831100 return report
1101+
1102+
1103+ import pandas as pd
1104+ import urllib .request
1105+ import geoip2 .database
1106+ from geopy .geocoders import Nominatim
1107+ import folium
1108+
1109+ """
1110+ Import modules that will be used to create the world map, find coordinates of affiliations, and
1111+ """
1112+
1113+ url = "https://download.brainimagelibrary.org/inventory/daily/reports/today.json"
1114+ file_path , _ = urllib .request .urlretrieve (url )
1115+ df = pd .read_json (file_path )
1116+ df
1117+ """
1118+ Geopy - Input: University Output: Address, lat, lon
1119+ Folium - visual map creator
1120+ """
1121+
1122+ map = folium .Map ()
1123+
1124+ from tqdm import tqdm
1125+
1126+ for index , row in tqdm (df .iterrows ()):
1127+ city = row ["city" ]
1128+ lat = row ["lat" ]
1129+ lon = row ["lng" ]
1130+ folium .Marker ([lat , lon ], popup = city ).add_to (map )
1131+ map .save ("project_map.html" )
0 commit comments