Skip to content

Commit e5d18f3

Browse files
committed
# Updated code to import and use the pandarallel library for parallel processing.
# Created a new function to check if a URL is reachable. # Created a new function to compute the percentage of datasets that are reachable. # Updated function names for better clarity. # Added new functions to extract and count various metadata values from the dataset. # Added new keys to the report dictionary to include the extracted metadata values.
1 parent 3a69585 commit e5d18f3

File tree

1 file changed

+74
-24
lines changed

1 file changed

+74
-24
lines changed

braininventory/get.py

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,15 @@
22
import pandas as pd
33
import json
44
from datetime import date
5+
from pandarallel import pandarallel
6+
7+
pandarallel.initialize(nb_workers=8, progress_bar=True)
8+
59

610
def today():
711
"""
8-
Get today's snapshot of Brain Image Library.
9-
"""
12+
Get today's snapshot of Brain Image Library.
13+
"""
1014

1115
server = "https://download.brainimagelibrary.org/inventory/daily/reports/"
1216
filename = "today.json"
@@ -23,38 +27,84 @@ def today():
2327
else:
2428
print("Error: Failed to fetch JSON data")
2529
return pd.DataFrame()
26-
30+
31+
2732
def __get_number_of_datasets(df):
28-
return len(df)
33+
return len(df)
34+
2935

3036
def __get_completeness_score(df):
31-
return df['score'].sum()/len(df)
37+
return df["score"].sum() / len(df)
38+
3239

33-
def __is_reachable(url):
34-
response = requests.get(url)
40+
def __is_it_reachable(url):
41+
response = requests.get(url)
42+
43+
if response.status_code == 200:
44+
return True
45+
else:
46+
return False
47+
48+
49+
def __are_reachable(df):
50+
print("Computing what datasets are reachable")
51+
df["is_reachable"] = df["URL"].parallel_apply(__is_it_reachable)
52+
return df["is_reachable"].sum() / len(df)
3553

36-
if response.status_code == 200:
37-
return True
38-
else:
39-
return False
4054

4155
def __get_metadata_version(df):
42-
return df['metadata_version'] .value_counts().to_dict()
56+
return df["metadata_version"].value_counts().to_dict()
57+
58+
59+
def __get_genotypes(df):
60+
return df["genotype"].value_counts().to_dict()
61+
62+
63+
def __get_modalities(df):
64+
return df["generalmodality"].value_counts().to_dict()
65+
66+
67+
def __get_techniques(df):
68+
return df["technique"].value_counts().to_dict()
69+
70+
71+
def __get_award_numbers(df):
72+
return df["award_number"].value_counts().to_dict()
73+
74+
75+
def __get_affiliations(df):
76+
return df["affiliation"].value_counts().to_dict()
77+
78+
79+
def __get_contributors(df):
80+
return df["contributorname"].value_counts().to_dict()
81+
82+
83+
def __get_projects(df):
84+
return df["project"].value_counts().to_dict()
85+
4386

4487
def report():
4588
# Get today's date
46-
tdate = date.today()
89+
tdate = date.today()
90+
91+
# Convert date to string
92+
tdate = tdate.strftime("%Y-%m-%d")
4793

48-
# Convert date to string
49-
tdate = tdate.strftime("%Y-%m-%d")
50-
51-
df = today()
94+
df = today()
5295

53-
report = {}
54-
report['date'] = tdate
55-
report['number_of_datasets'] = __get_number_of_datasets(df)
56-
report['completeness_score'] = __get_completeness_score(df)
57-
report['metadata_version'] = __get_metadata_version(df)
58-
report['is_reachable'] = df['URL'].apply(__is_reachable)
96+
report = {}
97+
report["date"] = tdate
98+
report["number_of_datasets"] = __get_number_of_datasets(df)
99+
report["completeness_score"] = __get_completeness_score(df)
100+
report["metadata_version"] = __get_metadata_version(df)
101+
# report['are_reachable'] = __are_reachable(df)
102+
report["genotypes"] = __get_genotypes(df)
103+
report["modalities"] = __get_modalities(df)
104+
report["award_numbers"] = __get_award_numbers(df)
105+
report["tecniques"] = __get_techniques(df)
106+
report["affiliations"] = __get_affiliations(df)
107+
report["contributors"] = __get_contributors(df)
108+
report["projects"] = __get_projects(df)
59109

60-
return report
110+
return report

0 commit comments

Comments
 (0)