Skip to content

Commit 00bcd2d

Browse files
authored
Merge pull request #71 from brain-image-library/48-new-metric-request-map-for-projects-+-affiliations
48 new metric request map for projects + affiliations
2 parents 49939d2 + aa773c1 commit 00bcd2d

File tree

1 file changed

+162
-14
lines changed

1 file changed

+162
-14
lines changed

braininventory/get.py

Lines changed: 162 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,22 @@
2020
import squarify
2121

2222

23-
def __get_general_modality_plot(df):
23+
def __create_general_modality_plot(df):
24+
"""
25+
Create a bar plot to visualize the frequency of general modalities.
26+
27+
This function takes a pandas DataFrame as input and generates a bar plot to display the frequency
28+
of different general modalities in the 'generalmodality' column of the DataFrame.
29+
30+
Parameters:
31+
df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
32+
33+
Returns:
34+
None: The function generates a bar plot but does not return any value. The plot is saved as an
35+
image file with a filename in the format 'general-modality-YYYYMMDD.png', where 'YYYYMMDD'
36+
represents the current date in year-month-day format.
37+
"""
38+
2439
modality_counts = df["generalmodality"].value_counts()
2540

2641
plt.figure(figsize=(10, 6))
@@ -41,10 +56,20 @@ def __get_general_modality_plot(df):
4156

4257
def get_random_sample(df):
4358
"""
44-
Returns a random sample from the dataframe from a dataset with non-zero score.
59+
Retrieve a random JSON file from the DataFrame.
4560
46-
Input: dataframe
47-
Output:open the json file that was located in datasets Brain Image Library dataframe
61+
This function takes a pandas DataFrame as input and filters it to keep only the rows that have
62+
a non-zero 'score' value. From the filtered rows, it selects a random row using the 'random.randint()'
63+
function. It then generates a valid link to the JSON file by replacing '/bil/data' with
64+
'https://download.brainimagelibrary.org' in the 'json_file' column of the selected row. The function
65+
performs an HTTP GET request to download the JSON file from the generated link, and it returns the
66+
JSON data as a Python dictionary.
67+
68+
Parameters:
69+
df (pandas.DataFrame): The input DataFrame containing the 'score' and 'json_file' columns.
70+
71+
Returns:
72+
dict: A Python dictionary containing the JSON data retrieved from a random row's JSON file.
4873
"""
4974

5075
isNotZero = df[df["score"] != 0.0] # only have files with the correct data
@@ -70,11 +95,23 @@ def __get_lable_dict(name_lst):
7095
}
7196

7297

73-
def __get_general_modality_treemap(df):
98+
def __create_general_modality_treemap(df):
7499
"""
75-
input: dataframe
76-
output: tree map that displays the frequencies of "generalmodality" that occur in dataframe
100+
Create a treemap visualization for the general modality data.
101+
102+
This function takes a pandas DataFrame as input and generates a treemap visualization based on
103+
the counts of different modalities in the 'generalmodality' column of the DataFrame. The function
104+
utilizes the Squarify library to create the treemap.
105+
106+
Parameters:
107+
df (pandas.DataFrame): The input DataFrame containing the 'generalmodality' column.
108+
109+
Returns:
110+
None: The function generates a treemap and saves it as an image file, but it does not return any value.
111+
The treemap is saved with a filename in the format 'treemap-general-modality-YYYYMMDD.png', where
112+
'YYYYMMDD' represents the current date in year-month-day format.
77113
"""
114+
78115
modality_counts = df["generalmodality"].value_counts().to_dict()
79116
plt.figure(figsize=(14, 10))
80117
values = list(modality_counts.values())
@@ -94,16 +131,29 @@ def __get_general_modality_treemap(df):
94131
plt.legend(
95132
legend_patches, name, loc="upper left", bbox_to_anchor=(1, 1), fontsize="medium"
96133
)
97-
plt.show()
134+
135+
filename = f'treemap-general-modality-{datetime.now().strftime("%Y%m%d")}.png'
136+
plt.savefig(filename)
98137

99138

100139
def __get_pretty_size_statistics(df):
101140
"""
102-
Pretty version of __get_size_statistics
141+
Get human-readable size statistics from the DataFrame.
103142
104-
Input: dataframe
105-
Output: list of strings
143+
This method takes a pandas DataFrame as input and calculates size statistics using the '__get_size_statistics()'
144+
method. The statistics include the minimum, maximum, mean, and total size of the data in the DataFrame.
145+
146+
Parameters:
147+
df (pandas.DataFrame): The input DataFrame.
148+
149+
Returns:
150+
list: A list containing human-readable size statistics in the following order:
151+
- Human-readable minimum size.
152+
- Human-readable maximum size.
153+
- Human-readable mean size.
154+
- Human-readable total size.
106155
"""
156+
107157
size_stats = __get_size_statistics(df)
108158

109159
return [
@@ -116,10 +166,20 @@ def __get_pretty_size_statistics(df):
116166

117167
def __get_size_statistics(df):
118168
"""
119-
Helper method that returns size statistics from size column.
169+
Calculate basic size statistics from the DataFrame.
120170
121-
Input: dataframe
122-
Output: list of numbers
171+
This method takes a pandas DataFrame as input and calculates basic size statistics, including the minimum,
172+
maximum, mean, and standard deviation of the 'size' column in the DataFrame.
173+
174+
Parameters:
175+
df (pandas.DataFrame): The input DataFrame containing the 'size' column.
176+
177+
Returns:
178+
list: A list containing the size statistics in the following order:
179+
- Minimum size.
180+
- Maximum size.
181+
- Mean size.
182+
- Standard deviation of sizes.
123183
"""
124184

125185
min = df["size"].min()
@@ -185,6 +245,63 @@ def get_date(df):
185245
return f"{yr}-{day}-{mnt}" # format in year-day-month
186246

187247

248+
import geoip2.database
249+
from geopy.geocoders import Nominatim
250+
import folium
251+
import math
252+
import urllib.request
253+
254+
"""print(c.get_country_cities(country_code_iso="DE"))"""
255+
"""
256+
Geopy: input: University #correct some data (do later) Output: Address, lat, lon
257+
folium or Ivan's
258+
Must choose module to make the map
259+
"""
260+
261+
262+
def __get_affiliations(df):
263+
return df["affiliation"].value_counts().keys()
264+
265+
266+
def __get_coordin(university):
267+
geolocator = Nominatim(user_agent="my_geocoding_app")
268+
try:
269+
location = geolocator.geocode(university)
270+
if location:
271+
return location.latitude, location.longitude
272+
else:
273+
return (0, 0)
274+
except:
275+
print(f"Geocoding service is unavailable for {university}")
276+
return 0, 0
277+
278+
279+
def get_zero_coords(affiliation_coordinates):
280+
total = 0
281+
zeros = 0
282+
for university in affiliation_coordinates:
283+
if affiliation_coordinates[university] == (0, 0):
284+
zeros += 1
285+
print(affiliation_coordinates[university], university, "ain't working")
286+
total += 1
287+
return zeros, total, math.ceil(zeros / total * 100) / 100
288+
289+
290+
def __get_affilation_coordinates(df):
291+
affiliations_dict = {}
292+
for university in __get_affiliations(df):
293+
latitude, longitude = __get_coordin(university)
294+
if latitude is not None and longitude is not None:
295+
affiliations_dict[university] = (latitude, longitude)
296+
return affiliations_dict
297+
298+
299+
if __name__ == "__main__":
300+
affiliation_coordinates = __get_affilation_coordinates(df)
301+
print(affiliation_coordinates)
302+
print(get_zero_coords(affiliation_coordinates))
303+
304+
188305
def today():
189306
"""
190307
Get today's snapshot of Brain Image Library.
@@ -981,3 +1098,34 @@ def report():
9811098
get_projects_treemap(df)
9821099

9831100
return report
1101+
1102+
1103+
import pandas as pd
1104+
import urllib.request
1105+
import geoip2.database
1106+
from geopy.geocoders import Nominatim
1107+
import folium
1108+
1109+
"""
1110+
Import modules that will be used to create the world map, find coordinates of affiliations, and
1111+
"""
1112+
1113+
url = "https://download.brainimagelibrary.org/inventory/daily/reports/today.json"
1114+
file_path, _ = urllib.request.urlretrieve(url)
1115+
df = pd.read_json(file_path)
1116+
df
1117+
"""
1118+
Geopy - Input: University Output: Address, lat, lon
1119+
Folium - visual map creator
1120+
"""
1121+
1122+
map = folium.Map()
1123+
1124+
from tqdm import tqdm
1125+
1126+
for index, row in tqdm(df.iterrows()):
1127+
city = row["city"]
1128+
lat = row["lat"]
1129+
lon = row["lng"]
1130+
folium.Marker([lat, lon], popup=city).add_to(map)
1131+
map.save("project_map.html")

0 commit comments

Comments
 (0)