Skip to content

Commit 49f2fdf

Browse files
authored
Merge branch 'main' into 8-new-metric-request-of-datasets-that-exist-on-bildata
2 parents c89d06e + eefec85 commit 49f2fdf

File tree

3 files changed

+109
-63
lines changed

3 files changed

+109
-63
lines changed

AUTHORS

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,17 @@ Ivan Cao-Berg (icaoberg) - Pittsburgh Supercomputing Center
44
## Undergraduate Students
55
Eduardo J. Figueroa (EduardoJFigueroa) - University of Puerto Rico
66

7-
# SAMS Data Science Group
7+
## CS Scholars Data Science Group
8+
Joshua Franco (francojoshua) - Western High School
9+
Nicolas Watkins (nicolasw-cmu) - Edmond Santa Fe Highschool
10+
Carmen Ung (cmucung) - Rosemead High School
11+
Marcuslyne Sieh (marcuslynes) - The Metropolitan Regional Career and Technical Center
12+
Manav Mahida (ManavMahida) - North Penn High School
13+
Temidayo Ogundare (Togundar) - Science Park High School
14+
Neptune Barton (jellyfishking-github)- Eldorado High School
15+
16+
17+
## SAMS Data Science Group
818
Tiffany Wang (Tffny3) - Cupertino High School
919
Eileen Lin (eileen-png) - Eleanor Roosevelt High School, eSTEM Academy
1020
Louis Lin (Bobvius) - Jack Britt High

braininventory/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .get import *
1+
from .get import *

braininventory/get.py

Lines changed: 97 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44
from datetime import date
55

6+
67
def today():
78
"""
89
Get today's snapshot of Brain Image Library.
@@ -23,95 +24,130 @@ def today():
2324
else:
2425
print("Error: Failed to fetch JSON data")
2526
return pd.DataFrame()
26-
27+
28+
2729
def __get_number_of_datasets(df):
28-
return len(df)
30+
return len(df)
31+
2932

3033
def __get_completeness_score(df):
31-
return df['score'].sum()/len(df)
34+
return df["score"].sum() / len(df)
35+
3236

3337
def __is_reachable(url):
34-
response = requests.get(url)
38+
response = requests.get(url)
39+
40+
if response.status_code == 200:
41+
return True
42+
else:
43+
return False
3544

36-
if response.status_code == 200:
37-
return True
38-
else:
39-
return False
4045

4146
def __get_metadata_version(df):
42-
return df['metadata_version'] .value_counts().to_dict()
47+
return df["metadata_version"].value_counts().to_dict()
48+
4349

4450
def __get_contributor(df):
45-
return df['contributor'].value_counts().to_dict()
51+
return df["contributor"].value_counts().to_dict()
52+
4653

4754
def __get_affilation(df):
48-
return df['affiliation'].value_counts().to_dict()
55+
return df["affiliation"].value_counts().to_dict()
56+
57+
def __get_awards(df):
58+
return df["award_number"].unique()
4959

5060
def __get_award_number(df):
51-
return df['award_number'].value_counts().to_dict()
61+
return df["award_number"].value_counts().to_dict()
62+
5263

5364
def __get_species(df):
54-
return df['species'].value_counts().to_dict()
65+
return df["species"].value_counts().to_dict()
66+
5567

5668
def __get_cnbtaxonomy(df):
57-
return df['cnbtaxonomy'].value_counts().to_dict()
69+
return df["cnbtaxonomy"].value_counts().to_dict()
70+
5871

5972
def __get_samplelocalid(df):
60-
return df['samplelocalid'].value_counts().to_dict()
73+
return df["samplelocalid"].value_counts().to_dict()
74+
6175

6276
def __get_genotype(df):
63-
return df['genotype'].value_counts().to_dict()
77+
return df["genotype"].value_counts().to_dict()
78+
6479

6580
def __get_generalmodality(df):
66-
return df['generalmodality'].value_counts().to_dict()
81+
return df["generalmodality"].value_counts().to_dict()
82+
6783

6884
def __get_technique(df):
69-
return df['technique'].value_counts().to_dict()
85+
return df["technique"].value_counts().to_dict()
86+
7087

7188
def __get_locations(df):
72-
return df['locations'].value_counts().to_dict()
89+
return df["locations"].value_counts().to_dict()
90+
91+
def __get_contributors(df):
92+
"""
93+
This returns an array of contributor names from the contributorname column.
94+
"""
95+
return df["contributorname"].unique()
96+
97+
98+
def __get_project_names(df):
99+
'''
100+
Gets the unique list of project names.
101+
102+
Input: dataframe
103+
Output: list
104+
'''
105+
return df['project'].unique()
106+
107+
def __get_list_of_projects(df):
108+
'''
109+
Get the list of names for unique projects
110+
111+
Input parameter: dataframe
112+
Output: list of projects
113+
'''
114+
115+
return df['project'].unique().to_dict()
116+
117+
def __get_number_of_projects(df):
118+
'''
119+
Get the number of unique projects
120+
121+
Input parameter: dataframe
122+
Output: number of projects
123+
'''
124+
125+
return len(df['project'].unique())
73126

74127
def report():
75128
# Get today's date
76-
tdate = date.today()
77-
78-
# Convert date to string
79-
tdate = tdate.strftime("%Y-%m-%d")
80-
81-
df = today()
82-
83-
report = {}
84-
report['date'] = tdate
85-
report['number_of_datasets'] = __get_number_of_datasets(df)
86-
report['completeness_score'] = __get_completeness_score(df)
87-
report['metadata_version'] = __get_metadata_version(df)
88-
report['contributor'] = __get_contributor(df)
89-
report['affiliation'] = __get_affilation(df)
90-
report['award_number'] = __get_award_number(df)
91-
report['species'] = __get_species(df)
92-
report['cnbtaxonomy'] = __get_cnbtaxonomy(df)
93-
report['samplelocalid'] = __get_samplelocalid(df)
94-
report['genotype'] = __get_genotype(df)
95-
report['generalmodality'] = __get_generalmodality(df)
96-
report['technique'] = __get_technique(df)
97-
report['locations'] = __get_locations(df)
98-
99-
report['is_reachable'] = df['URL'].apply(__is_reachable)
100-
101-
return report
102-
103-
#The following block is a function that finds the number of rows that have 'true' under the key 'exists'.
104-
def __get_exists_true(df):
105-
return len(df[df['exists']== True]) #The true listed in the dataframe is the Boolean true value.
106-
print(__get_exists_true(df))
107-
108-
#The following block is a function that finds the number of total rows.
109-
def __get_exists_total(df):
110-
return len(df) #len counts the number of rows in the dataframe.
111-
print(__get_exists_total(df))
112-
113-
#Now that we have the total number of exists and the total number of rows in the dataframe we can find the fraction of the total that exist using simple division.
114-
proportion = exists_true/exists_total
115-
print(proportion)
116-
print(f'The proportion of samples that exists is equal to ' + str(proportion) + '.') #The proportion is a variable with a numerical value that must be casted to add to other string objects in the print function.
117-
129+
tdate = date.today()
130+
131+
# Convert date to string
132+
tdate = tdate.strftime("%Y-%m-%d")
133+
134+
df = today()
135+
136+
report = {}
137+
report["date"] = tdate
138+
report["number_of_datasets"] = __get_number_of_datasets(df)
139+
report["completeness_score"] = __get_completeness_score(df)
140+
report["metadata_version"] = __get_metadata_version(df)
141+
report["contributor"] = __get_contributor(df)
142+
report["affiliation"] = __get_affilation(df)
143+
report["award_number"] = __get_award_number(df)
144+
report["species"] = __get_species(df)
145+
report["cnbtaxonomy"] = __get_cnbtaxonomy(df)
146+
report["samplelocalid"] = __get_samplelocalid(df)
147+
report["genotype"] = __get_genotype(df)
148+
report["generalmodality"] = __get_generalmodality(df)
149+
report["technique"] = __get_technique(df)
150+
report["locations"] = __get_locations(df)
151+
report["is_reachable"] = df["URL"].apply(__is_reachable)
152+
153+
return report

0 commit comments

Comments
 (0)