Skip to content

Commit 7c386ab

Browse files
authored
Merge pull request #44 from brain-image-library/43-combined-affiliations
Update get.py
2 parents 6422a77 + c32a3b4 commit 7c386ab

File tree

1 file changed

+44
-1
lines changed

1 file changed

+44
-1
lines changed

braininventory/get.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,49 @@ def today():
6060
return pd.DataFrame()
6161

6262

63+
def __clean_affiliations(df):
64+
# Need to combine the universities so the pie chart shows a single university's total samples under the same area.
65+
# right now there is one area of the pie chart that says 'Allen Institute for Brain Science' and 'Allen Instititute for Brain Science ' (with a space!)
66+
# we need it to recognize that the Allen Institue for Brain Science has contributed a total number of samples equal to the sum of both those areas in the pie chart
67+
Allen = df[df["affiliation"] == "Allen Institute for Brain Science"]
68+
69+
# ^we set a variable 'Allen' equal to the dataframe limited to the rows where it said 'Allen Institute for Brain Science and asked for the count of those rows
70+
# below, we did the same thing but with rows that had Allen with a space
71+
72+
Allen_with_space = df[df["affiliation"] == "Allen Institute for Brain Science "]
73+
74+
accurate_Allen = len(Allen) + len(Allen_with_space)
75+
76+
del affiliations["Allen Institute for Brain Science "]
77+
78+
# Now we can add the counts we had before. (We deleted Allen with a space, but we still have the number of Allen with a space)
79+
# reassign the Allen Institute for Brain Science variable to the actual total number (4715)
80+
81+
affiliations["Allen Institute for Brain Science"] = len(Allen) + len(
82+
Allen_with_space
83+
)
84+
85+
# Now we need to do the same thing with UCLA
86+
# 1) limit the dictionary to ones that read 'University of California, Los Angeles' and the one that says 'University of California, Los Angeles (UCLA)'
87+
88+
No_UCLA = affiliations["University of California, Los Angeles"]
89+
90+
UCLA_present = affiliations["University of California, Los Angeles (UCLA)"]
91+
92+
# 2) Now we found the counts of both. We have to set the college equal to the sum of the category with no UCLA and the catgory with UCLA and delete the one we don't want.
93+
94+
# del affiliations['University of California, Los Angeles (UCLA)']
95+
# print(len(affiliations))
96+
97+
accurate_Uni = (
98+
affiliations["University of California, Los Angeles"]
99+
+ affiliations["University of California, Los Angeles (UCLA)"]
100+
)
101+
102+
del affiliations["University of California, Los Angeles (UCLA)"]
103+
return affiliations
104+
105+
63106
def __get_affiliation_frequency(df):
64107
"""
65108
Get affiliation frequency.
@@ -180,7 +223,7 @@ def __get_contributors(df):
180223

181224
def __get_project_names(df):
182225
"""
183-
Gets the unique list of project names.
226+
Gets the unique list of project names.
184227
185228
Input: dataframe
186229
Output: list

0 commit comments

Comments
 (0)