@@ -130,36 +130,61 @@ def __get_size_statistics(df):
130
130
return [min , max , average , std ]
131
131
132
132
133
- url = 'https://download.brainimagelibrary.org/inventory/daily/reports/today.json'
134
- file_path , _ = urllib .request .urlretrieve (url )
135
- df = pd .read_json (file_path )
136
-
137
133
def get_jsonFile (df ):
138
- """
139
- Input: dataframe
140
- Output:open the jsonFile that was located in datasets Brain Image Library dataframe
141
- """
142
- isNotZero = df [df ["score" ] != 0.0 ] #only have files with the correct data
143
- randomRow = isNotZero .iloc [random .randint (0 , len (isNotZero ))] #select a random row of random index
144
- jsonFileLink = randomRow .json_file .replace ("/bil/data" , "https://download.brainimagelibrary.org" , 1 ) #create the link
145
- result = requests .get (jsonFileLink )
134
+ """
135
+ Extract and format the date from the DataFrame.
136
+
137
+ This function takes a pandas DataFrame as input and extracts the creation date information from
138
+ the associated JSON file using the 'get_jsonFile()' function. It then processes the date information,
139
+ reformatting it into the 'year-day-month' format (e.g., '2023-24-Jul').
140
+
141
+ Parameters:
142
+ df (pandas.DataFrame): The input DataFrame.
143
+
144
+ Returns:
145
+ str: A string representing the formatted date in the 'year-day-month' format.
146
+ """
147
+
148
+ isNotZero = df [df ["score" ] != 0.0 ] # only have files with the correct data
149
+ randomRow = isNotZero .iloc [
150
+ random .randint (0 , len (isNotZero ))
151
+ ] # select a random row of random index
152
+ jsonFileLink = randomRow .json_file .replace (
153
+ "/bil/data" , "https://download.brainimagelibrary.org" , 1
154
+ ) # create the link
155
+ result = requests .get (jsonFileLink )
156
+
157
+ return result .json ()
146
158
147
- return result .json ()
148
159
149
160
def get_date (df ):
150
- """
151
- Input: dateframe
152
- Output: date data was created in year-day-month format
153
- """
154
- jsonFile = get_jsonFile (df ) #get the jsonFile information with get_jsonFile() function
155
- dateList = jsonFile ['creation_date' ].split () #get creation_date
156
- mntList = dict ((month , index ) for index , month in enumerate (calendar .month_abbr ) if month ) #month abbr to number
157
- yr = dateList [4 ] #get year
158
- mnt = mntList [dateList [1 ]] #get month
159
- day = dateList [2 ] #get day
160
- return f"{ yr } -{ day } -{ mnt } " #format in year-day-month
161
-
162
-
161
+ """
162
+ Get unique genotypes from the DataFrame.
163
+
164
+ This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
165
+ column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
166
+ column.
167
+
168
+ Parameters:
169
+ df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
170
+
171
+ Returns:
172
+ numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
173
+ """
174
+
175
+ jsonFile = get_jsonFile (
176
+ df
177
+ ) # get the jsonFile information with get_jsonFile() function
178
+ dateList = jsonFile ["creation_date" ].split () # get creation_date
179
+ mntList = dict (
180
+ (month , index ) for index , month in enumerate (calendar .month_abbr ) if month
181
+ ) # month abbr to number
182
+ yr = dateList [4 ] # get year
183
+ mnt = mntList [dateList [1 ]] # get month
184
+ day = dateList [2 ] # get day
185
+ return f"{ yr } -{ day } -{ mnt } " # format in year-day-month
186
+
187
+
163
188
def today ():
164
189
"""
165
190
Get today's snapshot of Brain Image Library.
@@ -566,7 +591,17 @@ def __get_cnbtaxonomy(df):
566
591
567
592
def __get_genotypes (df ):
568
593
"""
569
- Write documentation here.
594
+ Get unique genotypes from the DataFrame.
595
+
596
+ This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
597
+ column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
598
+ column.
599
+
600
+ Parameters:
601
+ df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
602
+
603
+ Returns:
604
+ numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
570
605
"""
571
606
return df ["genotype" ].unique ()
572
607
0 commit comments