2
2
import pandas as pd
3
3
import json
4
4
from datetime import date
5
+ from pandarallel import pandarallel
6
+
7
+ pandarallel .initialize (nb_workers = 8 , progress_bar = True )
8
+
5
9
6
10
def today ():
7
11
"""
8
- Get today's snapshot of Brain Image Library.
9
- """
12
+ Get today's snapshot of Brain Image Library.
13
+ """
10
14
11
15
server = "https://download.brainimagelibrary.org/inventory/daily/reports/"
12
16
filename = "today.json"
@@ -23,38 +27,84 @@ def today():
23
27
else :
24
28
print ("Error: Failed to fetch JSON data" )
25
29
return pd .DataFrame ()
26
-
30
+
31
+
27
32
def __get_number_of_datasets (df ):
28
- return len (df )
33
+ return len (df )
34
+
29
35
30
36
def __get_completeness_score (df ):
31
- return df ['score' ].sum ()/ len (df )
37
+ return df ["score" ].sum () / len (df )
38
+
32
39
33
- def __is_reachable (url ):
34
- response = requests .get (url )
40
+ def __is_it_reachable (url ):
41
+ response = requests .get (url )
42
+
43
+ if response .status_code == 200 :
44
+ return True
45
+ else :
46
+ return False
47
+
48
+
49
+ def __are_reachable (df ):
50
+ print ("Computing what datasets are reachable" )
51
+ df ["is_reachable" ] = df ["URL" ].parallel_apply (__is_it_reachable )
52
+ return df ["is_reachable" ].sum () / len (df )
35
53
36
- if response .status_code == 200 :
37
- return True
38
- else :
39
- return False
40
54
41
55
def __get_metadata_version (df ):
42
- return df ['metadata_version' ] .value_counts ().to_dict ()
56
+ return df ["metadata_version" ].value_counts ().to_dict ()
57
+
58
+
59
+ def __get_genotypes (df ):
60
+ return df ["genotype" ].value_counts ().to_dict ()
61
+
62
+
63
+ def __get_modalities (df ):
64
+ return df ["generalmodality" ].value_counts ().to_dict ()
65
+
66
+
67
+ def __get_techniques (df ):
68
+ return df ["technique" ].value_counts ().to_dict ()
69
+
70
+
71
+ def __get_award_numbers (df ):
72
+ return df ["award_number" ].value_counts ().to_dict ()
73
+
74
+
75
+ def __get_affiliations (df ):
76
+ return df ["affiliation" ].value_counts ().to_dict ()
77
+
78
+
79
+ def __get_contributors (df ):
80
+ return df ["contributorname" ].value_counts ().to_dict ()
81
+
82
+
83
+ def __get_projects (df ):
84
+ return df ["project" ].value_counts ().to_dict ()
85
+
43
86
44
87
def report ():
45
88
# Get today's date
46
- tdate = date .today ()
89
+ tdate = date .today ()
90
+
91
+ # Convert date to string
92
+ tdate = tdate .strftime ("%Y-%m-%d" )
47
93
48
- # Convert date to string
49
- tdate = tdate .strftime ("%Y-%m-%d" )
50
-
51
- df = today ()
94
+ df = today ()
52
95
53
- report = {}
54
- report ['date' ] = tdate
55
- report ['number_of_datasets' ] = __get_number_of_datasets (df )
56
- report ['completeness_score' ] = __get_completeness_score (df )
57
- report ['metadata_version' ] = __get_metadata_version (df )
58
- report ['is_reachable' ] = df ['URL' ].apply (__is_reachable )
96
+ report = {}
97
+ report ["date" ] = tdate
98
+ report ["number_of_datasets" ] = __get_number_of_datasets (df )
99
+ report ["completeness_score" ] = __get_completeness_score (df )
100
+ report ["metadata_version" ] = __get_metadata_version (df )
101
+ # report['are_reachable'] = __are_reachable(df)
102
+ report ["genotypes" ] = __get_genotypes (df )
103
+ report ["modalities" ] = __get_modalities (df )
104
+ report ["award_numbers" ] = __get_award_numbers (df )
105
+ report ["tecniques" ] = __get_techniques (df )
106
+ report ["affiliations" ] = __get_affiliations (df )
107
+ report ["contributors" ] = __get_contributors (df )
108
+ report ["projects" ] = __get_projects (df )
59
109
60
- return report
110
+ return report
0 commit comments