- Refactor get_jsonFile function to extract and format the date from the DataFrame.

Ivan Cao-Berg · Ivan Cao-Berg · commit b55f2256ea3f · 2023-07-24T01:38:26.000-04:00
- Refactor `get_date` function to get the unique genotypes from the DataFrame.
- Remove commented out code for downloading and reading the JSON file.
- Add documentation for the `get_jsonFile` function.
- Add documentation for the `get_date` function.
- Add documentation for the `__get_genotypes` function.
diff --git a/braininventory/get.py b/braininventory/get.py
@@ -130,36 +130,61 @@ def __get_size_statistics(df):
     return [min, max, average, std]
 
 
-url = 'https://download.brainimagelibrary.org/inventory/daily/reports/today.json'
-file_path, _ = urllib.request.urlretrieve(url)
-df = pd.read_json(file_path)
-
 def get_jsonFile(df):
-    	"""
-    	Input: dataframe
-    	Output:open the jsonFile that was located in datasets Brain Image Library dataframe
-    	"""
-    	isNotZero = df[df["score"] != 0.0] #only have files with the correct data
-    	randomRow = isNotZero.iloc[random.randint(0, len(isNotZero))] #select a random row of random index
-    	jsonFileLink = randomRow.json_file.replace("/bil/data", "https://download.brainimagelibrary.org", 1) #create the link
-    	result = requests.get(jsonFileLink)
+    """
+    Extract and format the date from the DataFrame.
+
+    This function takes a pandas DataFrame as input and extracts the creation date information from
+    the associated JSON file using the 'get_jsonFile()' function. It then processes the date information,
+    reformatting it into the 'year-day-month' format (e.g., '2023-24-Jul').
+
+    Parameters:
+        df (pandas.DataFrame): The input DataFrame.
+
+    Returns:
+        str: A string representing the formatted date in the 'year-day-month' format.
+    """
+
+    isNotZero = df[df["score"] != 0.0]  # only have files with the correct data
+    randomRow = isNotZero.iloc[
+        random.randint(0, len(isNotZero))
+    ]  # select a random row of random index
+    jsonFileLink = randomRow.json_file.replace(
+        "/bil/data", "https://download.brainimagelibrary.org", 1
+    )  # create the link
+    result = requests.get(jsonFileLink)
+
+    return result.json()
 
-    	return result.json()
 
 def get_date(df):
-	"""
- 	Input: dateframe
-  	Output: date data was created in year-day-month format
-   	"""
-	jsonFile = get_jsonFile(df) #get the jsonFile information with get_jsonFile() function
-	dateList = jsonFile['creation_date'].split() #get creation_date
-    	mntList = dict((month, index) for index, month in enumerate(calendar.month_abbr) if month) #month abbr to number
-    	yr = dateList[4] #get year
-    	mnt= mntList[dateList[1]] #get month
-    	day = dateList[2] #get day
-    	return f"{yr}-{day}-{mnt}" #format in year-day-month
-    
-    
+    """
+    Get unique genotypes from the DataFrame.
+
+    This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
+    column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
+    column.
+
+    Parameters:
+        df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
+
+    Returns:
+        numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
+    """
+
+    jsonFile = get_jsonFile(
+        df
+    )  # get the jsonFile information with get_jsonFile() function
+    dateList = jsonFile["creation_date"].split()  # get creation_date
+    mntList = dict(
+        (month, index) for index, month in enumerate(calendar.month_abbr) if month
+    )  # month abbr to number
+    yr = dateList[4]  # get year
+    mnt = mntList[dateList[1]]  # get month
+    day = dateList[2]  # get day
+    return f"{yr}-{day}-{mnt}"  # format in year-day-month
+
+
 def today():
     """
     Get today's snapshot of Brain Image Library.
@@ -566,7 +591,17 @@ def __get_cnbtaxonomy(df):
 
 def __get_genotypes(df):
     """
-    Write documentation here.
+    Get unique genotypes from the DataFrame.
+
+    This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype'
+    column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype'
+    column.
+
+    Parameters:
+        df (pandas.DataFrame): The input DataFrame containing the 'genotype' column.
+
+    Returns:
+        numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column.
     """
     return df["genotype"].unique()