Skip to content

Commit b55f225

Browse files
author
Ivan Cao-Berg
committed
- Refactor get_jsonFile function to extract and format the date from the DataFrame.
- Refactor `get_date` function to get the unique genotypes from the DataFrame. - Remove commented out code for downloading and reading the JSON file. - Add documentation for the `get_jsonFile` function. - Add documentation for the `get_date` function. - Add documentation for the `__get_genotypes` function.
1 parent 918fd9b commit b55f225

File tree

1 file changed

+62
-27
lines changed

1 file changed

+62
-27
lines changed

braininventory/get.py

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -130,36 +130,61 @@ def __get_size_statistics(df):
130130
return [min, max, average, std]
131131

132132

133-
url = 'https://download.brainimagelibrary.org/inventory/daily/reports/today.json'
134-
file_path, _ = urllib.request.urlretrieve(url)
135-
df = pd.read_json(file_path)
136-
137133
def get_jsonFile(df):
138-
"""
139-
Input: dataframe
140-
Output:open the jsonFile that was located in datasets Brain Image Library dataframe
141-
"""
142-
isNotZero = df[df["score"] != 0.0] #only have files with the correct data
143-
randomRow = isNotZero.iloc[random.randint(0, len(isNotZero))] #select a random row of random index
144-
jsonFileLink = randomRow.json_file.replace("/bil/data", "https://download.brainimagelibrary.org", 1) #create the link
145-
result = requests.get(jsonFileLink)
134+
"""
135+
Extract and format the date from the DataFrame.
136+
137+
This function takes a pandas DataFrame as input and extracts the creation date information from
138+
the associated JSON file using the 'get_jsonFile()' function. It then processes the date information,
139+
reformatting it into the 'year-day-month' format (e.g., '2023-24-Jul').
140+
141+
Parameters:
142+
df (pandas.DataFrame): The input DataFrame.
143+
144+
Returns:
145+
str: A string representing the formatted date in the 'year-day-month' format.
146+
"""
147+
148+
isNotZero = df[df["score"] != 0.0] # only have files with the correct data
149+
randomRow = isNotZero.iloc[
150+
random.randint(0, len(isNotZero))
151+
] # select a random row of random index
152+
jsonFileLink = randomRow.json_file.replace(
153+
"/bil/data", "https://download.brainimagelibrary.org", 1
154+
) # create the link
155+
result = requests.get(jsonFileLink)
156+
157+
return result.json()
146158

147-
return result.json()
148159

149160
def get_date(df):
150-
"""
151-
Input: dateframe
152-
Output: date data was created in year-day-month format
153-
"""
154-
jsonFile = get_jsonFile(df) #get the jsonFile information with get_jsonFile() function
155-
dateList = jsonFile['creation_date'].split() #get creation_date
156-
mntList = dict((month, index) for index, month in enumerate(calendar.month_abbr) if month) #month abbr to number
157-
yr = dateList[4] #get year
158-
mnt= mntList[dateList[1]] #get month
159-
day = dateList[2] #get day
160-
return f"{yr}-{day}-{mnt}" #format in year-day-month
161-
162-
161+
"""
162+
Get unique genotypes from the DataFrame.
163+
164+
This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
165+
column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
166+
column.
167+
168+
Parameters:
169+
df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
170+
171+
Returns:
172+
numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
173+
"""
174+
175+
jsonFile = get_jsonFile(
176+
df
177+
) # get the jsonFile information with get_jsonFile() function
178+
dateList = jsonFile["creation_date"].split() # get creation_date
179+
mntList = dict(
180+
(month, index) for index, month in enumerate(calendar.month_abbr) if month
181+
) # month abbr to number
182+
yr = dateList[4] # get year
183+
mnt = mntList[dateList[1]] # get month
184+
day = dateList[2] # get day
185+
return f"{yr}-{day}-{mnt}" # format in year-day-month
186+
187+
163188
def today():
164189
"""
165190
Get today's snapshot of Brain Image Library.
@@ -566,7 +591,17 @@ def __get_cnbtaxonomy(df):
566591

567592
def __get_genotypes(df):
568593
"""
569-
Write documentation here.
594+
Get unique genotypes from the DataFrame.
595+
596+
This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
597+
column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
598+
column.
599+
600+
Parameters:
601+
df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
602+
603+
Returns:
604+
numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
570605
"""
571606
return df["genotype"].unique()
572607

0 commit comments

Comments
 (0)