You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: braininventory/get.py
+60-55Lines changed: 60 additions & 55 deletions
Original file line number
Diff line number
Diff line change
@@ -26,6 +26,49 @@ def today():
26
26
returnpd.DataFrame()
27
27
28
28
29
+
def__clean_affiliations(df):
30
+
# Need to combine the universities so the pie chart shows a single university's total samples under the same area.
31
+
# right now there is one area of the pie chart that says 'Allen Institute for Brain Science' and 'Allen Instititute for Brain Science ' (with a space!)
32
+
# we need it to recognize that the Allen Institue for Brain Science has contributed a total number of samples equal to the sum of both those areas in the pie chart
33
+
Allen=df[df["affiliation"] =="Allen Institute for Brain Science"]
34
+
35
+
# ^we set a variable 'Allen' equal to the dataframe limited to the rows where it said 'Allen Institute for Brain Science and asked for the count of those rows
36
+
# below, we did the same thing but with rows that had Allen with a space
37
+
38
+
Allen_with_space=df[df["affiliation"] =="Allen Institute for Brain Science "]
39
+
40
+
accurate_Allen=len(Allen) +len(Allen_with_space)
41
+
42
+
delaffiliations["Allen Institute for Brain Science "]
43
+
44
+
# Now we can add the counts we had before. (We deleted Allen with a space, but we still have the number of Allen with a space)
45
+
# reassign the Allen Institute for Brain Science variable to the actual total number (4715)
46
+
47
+
affiliations["Allen Institute for Brain Science"] =len(Allen) +len(
48
+
Allen_with_space
49
+
)
50
+
51
+
# Now we need to do the same thing with UCLA
52
+
# 1) limit the dictionary to ones that read 'University of California, Los Angeles' and the one that says 'University of California, Los Angeles (UCLA)'
53
+
54
+
No_UCLA=affiliations["University of California, Los Angeles"]
55
+
56
+
UCLA_present=affiliations["University of California, Los Angeles (UCLA)"]
57
+
58
+
# 2) Now we found the counts of both. We have to set the college equal to the sum of the category with no UCLA and the catgory with UCLA and delete the one we don't want.
59
+
60
+
# del affiliations['University of California, Los Angeles (UCLA)']
61
+
# print(len(affiliations))
62
+
63
+
accurate_Uni= (
64
+
affiliations["University of California, Los Angeles"]
65
+
+affiliations["University of California, Los Angeles (UCLA)"]
66
+
)
67
+
68
+
delaffiliations["University of California, Los Angeles (UCLA)"]
69
+
returnaffiliations
70
+
71
+
29
72
def__get_number_of_datasets(df):
30
73
returnlen(df)
31
74
@@ -54,9 +97,11 @@ def __get_contributor(df):
54
97
def__get_affilation(df):
55
98
returndf["affiliation"].value_counts().to_dict()
56
99
100
+
57
101
def__get_awards(df):
58
102
returndf["award_number"].unique()
59
103
104
+
60
105
def__get_award_number(df):
61
106
returndf["award_number"].value_counts().to_dict()
62
107
@@ -88,6 +133,7 @@ def __get_technique(df):
88
133
def__get_locations(df):
89
134
returndf["locations"].value_counts().to_dict()
90
135
136
+
91
137
def__get_contributors(df):
92
138
"""
93
139
This returns an array of contributor names from the contributorname column.
#Need to combine the universities so the pie chart shows a single university's total samples under the same area.
156
-
#right now there is one area of the pie chart that says 'Allen Institute for Brain Science' and 'Allen Instititute for Brain Science ' (with a space!)
157
-
#we need it to recognize that the Allen Institue for Brain Science has contributed a total number of samples equal to the sum of both those areas in the pie chart
158
-
Allen=df[df['affiliation'] =='Allen Institute for Brain Science' ]
159
-
print(len(Allen))
160
-
161
-
#^we set a variable 'Allen' equal to the dataframe limited to the rows where it said 'Allen Institute for Brain Science and asked for the count of those rows
162
-
#below, we did the same thing but with rows that had Allen with a space
163
-
164
-
Allen_with_space=df[df['affiliation'] =='Allen Institute for Brain Science ' ]
165
-
print(len(Allen_with_space))
166
-
167
-
accurate_Allen=len(Allen) +len(Allen_with_space)
168
-
print(accurate_Allen)
169
-
170
-
delaffiliations['Allen Institute for Brain Science ']
171
-
print(affiliations)
172
-
173
-
#Now we can add the counts we had before. (We deleted Allen with a space, but we still have the number of Allen with a space)
174
-
#reassign the Allen Institute for Brain Science variable to the actual total number (4715)
175
-
176
-
affiliations['Allen Institute for Brain Science'] =len(Allen) +len(Allen_with_space)
177
-
print(affiliations)
178
-
179
-
#Now we need to do the same thing with UCLA
180
-
# 1) limit the dictionary to ones that read 'University of California, Los Angeles' and the one that says 'University of California, Los Angeles (UCLA)'
181
-
182
-
No_UCLA=affiliations['University of California, Los Angeles']
183
-
print(No_UCLA)
184
-
185
-
UCLA_present=affiliations['University of California, Los Angeles (UCLA)']
186
-
print(UCLA_present)
187
-
188
-
# 2) Now we found the counts of both. We have to set the college equal to the sum of the category with no UCLA and the catgory with UCLA and delete the one we don't want.
189
-
190
-
#del affiliations['University of California, Los Angeles (UCLA)']
191
-
#print(len(affiliations))
192
-
193
-
accurate_Uni=affiliations['University of California, Los Angeles'] +affiliations['University of California, Los Angeles (UCLA)']
194
-
print(accurate_Uni)
195
-
196
-
delaffiliations['University of California, Los Angeles (UCLA)']
0 commit comments