3
3
import json
4
4
from datetime import date
5
5
6
+
6
7
def today ():
7
8
"""
8
9
Get today's snapshot of Brain Image Library.
@@ -23,95 +24,130 @@ def today():
23
24
else :
24
25
print ("Error: Failed to fetch JSON data" )
25
26
return pd .DataFrame ()
26
-
27
+
28
+
27
29
def __get_number_of_datasets (df ):
28
- return len (df )
30
+ return len (df )
31
+
29
32
30
33
def __get_completeness_score (df ):
31
- return df ['score' ].sum ()/ len (df )
34
+ return df ["score" ].sum () / len (df )
35
+
32
36
33
37
def __is_reachable (url ):
34
- response = requests .get (url )
38
+ response = requests .get (url )
39
+
40
+ if response .status_code == 200 :
41
+ return True
42
+ else :
43
+ return False
35
44
36
- if response .status_code == 200 :
37
- return True
38
- else :
39
- return False
40
45
41
46
def __get_metadata_version (df ):
42
- return df ['metadata_version' ] .value_counts ().to_dict ()
47
+ return df ["metadata_version" ].value_counts ().to_dict ()
48
+
43
49
44
50
def __get_contributor (df ):
45
- return df ['contributor' ].value_counts ().to_dict ()
51
+ return df ["contributor" ].value_counts ().to_dict ()
52
+
46
53
47
54
def __get_affilation (df ):
48
- return df ['affiliation' ].value_counts ().to_dict ()
55
+ return df ["affiliation" ].value_counts ().to_dict ()
56
+
57
+ def __get_awards (df ):
58
+ return df ["award_number" ].unique ()
49
59
50
60
def __get_award_number (df ):
51
- return df ['award_number' ].value_counts ().to_dict ()
61
+ return df ["award_number" ].value_counts ().to_dict ()
62
+
52
63
53
64
def __get_species (df ):
54
- return df ['species' ].value_counts ().to_dict ()
65
+ return df ["species" ].value_counts ().to_dict ()
66
+
55
67
56
68
def __get_cnbtaxonomy (df ):
57
- return df ['cnbtaxonomy' ].value_counts ().to_dict ()
69
+ return df ["cnbtaxonomy" ].value_counts ().to_dict ()
70
+
58
71
59
72
def __get_samplelocalid (df ):
60
- return df ['samplelocalid' ].value_counts ().to_dict ()
73
+ return df ["samplelocalid" ].value_counts ().to_dict ()
74
+
61
75
62
76
def __get_genotype (df ):
63
- return df ['genotype' ].value_counts ().to_dict ()
77
+ return df ["genotype" ].value_counts ().to_dict ()
78
+
64
79
65
80
def __get_generalmodality (df ):
66
- return df ['generalmodality' ].value_counts ().to_dict ()
81
+ return df ["generalmodality" ].value_counts ().to_dict ()
82
+
67
83
68
84
def __get_technique (df ):
69
- return df ['technique' ].value_counts ().to_dict ()
85
+ return df ["technique" ].value_counts ().to_dict ()
86
+
70
87
71
88
def __get_locations (df ):
72
- return df ['locations' ].value_counts ().to_dict ()
89
+ return df ["locations" ].value_counts ().to_dict ()
90
+
91
+ def __get_contributors (df ):
92
+ """
93
+ This returns an array of contributor names from the contributorname column.
94
+ """
95
+ return df ["contributorname" ].unique ()
96
+
97
+
98
+ def __get_project_names (df ):
99
+ '''
100
+ Gets the unique list of project names.
101
+
102
+ Input: dataframe
103
+ Output: list
104
+ '''
105
+ return df ['project' ].unique ()
106
+
107
+ def __get_list_of_projects (df ):
108
+ '''
109
+ Get the list of names for unique projects
110
+
111
+ Input parameter: dataframe
112
+ Output: list of projects
113
+ '''
114
+
115
+ return df ['project' ].unique ().to_dict ()
116
+
117
+ def __get_number_of_projects (df ):
118
+ '''
119
+ Get the number of unique projects
120
+
121
+ Input parameter: dataframe
122
+ Output: number of projects
123
+ '''
124
+
125
+ return len (df ['project' ].unique ())
73
126
74
127
def report ():
75
128
# Get today's date
76
- tdate = date .today ()
77
-
78
- # Convert date to string
79
- tdate = tdate .strftime ("%Y-%m-%d" )
80
-
81
- df = today ()
82
-
83
- report = {}
84
- report ['date' ] = tdate
85
- report ['number_of_datasets' ] = __get_number_of_datasets (df )
86
- report ['completeness_score' ] = __get_completeness_score (df )
87
- report ['metadata_version' ] = __get_metadata_version (df )
88
- report ['contributor' ] = __get_contributor (df )
89
- report ['affiliation' ] = __get_affilation (df )
90
- report ['award_number' ] = __get_award_number (df )
91
- report ['species' ] = __get_species (df )
92
- report ['cnbtaxonomy' ] = __get_cnbtaxonomy (df )
93
- report ['samplelocalid' ] = __get_samplelocalid (df )
94
- report ['genotype' ] = __get_genotype (df )
95
- report ['generalmodality' ] = __get_generalmodality (df )
96
- report ['technique' ] = __get_technique (df )
97
- report ['locations' ] = __get_locations (df )
98
-
99
- report ['is_reachable' ] = df ['URL' ].apply (__is_reachable )
100
-
101
- return report
102
-
103
- #The following block is a function that finds the number of rows that have 'true' under the key 'exists'.
104
- def __get_exists_true (df ):
105
- return len (df [df ['exists' ]== True ]) #The true listed in the dataframe is the Boolean true value.
106
- print (__get_exists_true (df ))
107
-
108
- #The following block is a function that finds the number of total rows.
109
- def __get_exists_total (df ):
110
- return len (df ) #len counts the number of rows in the dataframe.
111
- print (__get_exists_total (df ))
112
-
113
- #Now that we have the total number of exists and the total number of rows in the dataframe we can find the fraction of the total that exist using simple division.
114
- proportion = exists_true / exists_total
115
- print (proportion )
116
- print (f'The proportion of samples that exists is equal to ' + str (proportion ) + '.' ) #The proportion is a variable with a numerical value that must be casted to add to other string objects in the print function.
117
-
129
+ tdate = date .today ()
130
+
131
+ # Convert date to string
132
+ tdate = tdate .strftime ("%Y-%m-%d" )
133
+
134
+ df = today ()
135
+
136
+ report = {}
137
+ report ["date" ] = tdate
138
+ report ["number_of_datasets" ] = __get_number_of_datasets (df )
139
+ report ["completeness_score" ] = __get_completeness_score (df )
140
+ report ["metadata_version" ] = __get_metadata_version (df )
141
+ report ["contributor" ] = __get_contributor (df )
142
+ report ["affiliation" ] = __get_affilation (df )
143
+ report ["award_number" ] = __get_award_number (df )
144
+ report ["species" ] = __get_species (df )
145
+ report ["cnbtaxonomy" ] = __get_cnbtaxonomy (df )
146
+ report ["samplelocalid" ] = __get_samplelocalid (df )
147
+ report ["genotype" ] = __get_genotype (df )
148
+ report ["generalmodality" ] = __get_generalmodality (df )
149
+ report ["technique" ] = __get_technique (df )
150
+ report ["locations" ] = __get_locations (df )
151
+ report ["is_reachable" ] = df ["URL" ].apply (__is_reachable )
152
+
153
+ return report
0 commit comments