@@ -130,36 +130,61 @@ def __get_size_statistics(df):
130130 return [min , max , average , std ]
131131
132132
133- url = 'https://download.brainimagelibrary.org/inventory/daily/reports/today.json'
134- file_path , _ = urllib .request .urlretrieve (url )
135- df = pd .read_json (file_path )
136-
137133def get_jsonFile (df ):
138- """
139- Input: dataframe
140- Output:open the jsonFile that was located in datasets Brain Image Library dataframe
141- """
142- isNotZero = df [df ["score" ] != 0.0 ] #only have files with the correct data
143- randomRow = isNotZero .iloc [random .randint (0 , len (isNotZero ))] #select a random row of random index
144- jsonFileLink = randomRow .json_file .replace ("/bil/data" , "https://download.brainimagelibrary.org" , 1 ) #create the link
145- result = requests .get (jsonFileLink )
134+ """
135+ Extract and format the date from the DataFrame.
136+
137+ This function takes a pandas DataFrame as input and extracts the creation date information from
138+ the associated JSON file using the 'get_jsonFile()' function. It then processes the date information,
139+ reformatting it into the 'year-day-month' format (e.g., '2023-24-Jul').
140+
141+ Parameters:
142+ df (pandas.DataFrame): The input DataFrame.
143+
144+ Returns:
145+ str: A string representing the formatted date in the 'year-day-month' format.
146+ """
147+
148+ isNotZero = df [df ["score" ] != 0.0 ] # only have files with the correct data
149+ randomRow = isNotZero .iloc [
150+ random .randint (0 , len (isNotZero ))
151+ ] # select a random row of random index
152+ jsonFileLink = randomRow .json_file .replace (
153+ "/bil/data" , "https://download.brainimagelibrary.org" , 1
154+ ) # create the link
155+ result = requests .get (jsonFileLink )
156+
157+ return result .json ()
146158
147- return result .json ()
148159
149160def get_date (df ):
150- """
151- Input: dateframe
152- Output: date data was created in year-day-month format
153- """
154- jsonFile = get_jsonFile (df ) #get the jsonFile information with get_jsonFile() function
155- dateList = jsonFile ['creation_date' ].split () #get creation_date
156- mntList = dict ((month , index ) for index , month in enumerate (calendar .month_abbr ) if month ) #month abbr to number
157- yr = dateList [4 ] #get year
158- mnt = mntList [dateList [1 ]] #get month
159- day = dateList [2 ] #get day
160- return f"{ yr } -{ day } -{ mnt } " #format in year-day-month
161-
162-
161+ """
162+ Get unique genotypes from the DataFrame.
163+
164+ This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
165+ column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
166+ column.
167+
168+ Parameters:
169+ df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
170+
171+ Returns:
172+ numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
173+ """
174+
175+ jsonFile = get_jsonFile (
176+ df
177+ ) # get the jsonFile information with get_jsonFile() function
178+ dateList = jsonFile ["creation_date" ].split () # get creation_date
179+ mntList = dict (
180+ (month , index ) for index , month in enumerate (calendar .month_abbr ) if month
181+ ) # month abbr to number
182+ yr = dateList [4 ] # get year
183+ mnt = mntList [dateList [1 ]] # get month
184+ day = dateList [2 ] # get day
185+ return f"{ yr } -{ day } -{ mnt } " # format in year-day-month
186+
187+
163188def today ():
164189 """
165190 Get today's snapshot of Brain Image Library.
@@ -566,7 +591,17 @@ def __get_cnbtaxonomy(df):
566591
567592def __get_genotypes (df ):
568593 """
569- Write documentation here.
594+ Get unique genotypes from the DataFrame.
595+
596+ This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
597+ column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
598+ column.
599+
600+ Parameters:
601+ df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
602+
603+ Returns:
604+ numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
570605 """
571606 return df ["genotype" ].unique ()
572607
0 commit comments