|
1 | 1 | import json
|
2 | 2 | from datetime import date
|
| 3 | +import calendar |
| 4 | +import pandas as pd |
| 5 | +import urllib.request |
| 6 | +import random |
| 7 | +import requests |
| 8 | +import calendar |
3 | 9 |
|
4 | 10 | import humanize
|
5 | 11 | import matplotlib.pyplot as plt
|
@@ -124,6 +130,61 @@ def __get_size_statistics(df):
|
124 | 130 | return [min, max, average, std]
|
125 | 131 |
|
126 | 132 |
|
| 133 | +def get_jsonFile(df): |
| 134 | + """ |
| 135 | + Extract and format the date from the DataFrame. |
| 136 | +
|
| 137 | + This function takes a pandas DataFrame as input and extracts the creation date information from |
| 138 | + the associated JSON file using the 'get_jsonFile()' function. It then processes the date information, |
| 139 | + reformatting it into the 'year-day-month' format (e.g., '2023-24-Jul'). |
| 140 | +
|
| 141 | + Parameters: |
| 142 | + df (pandas.DataFrame): The input DataFrame. |
| 143 | +
|
| 144 | + Returns: |
| 145 | + str: A string representing the formatted date in the 'year-day-month' format. |
| 146 | + """ |
| 147 | + |
| 148 | + isNotZero = df[df["score"] != 0.0] # only have files with the correct data |
| 149 | + randomRow = isNotZero.iloc[ |
| 150 | + random.randint(0, len(isNotZero)) |
| 151 | + ] # select a random row of random index |
| 152 | + jsonFileLink = randomRow.json_file.replace( |
| 153 | + "/bil/data", "https://download.brainimagelibrary.org", 1 |
| 154 | + ) # create the link |
| 155 | + result = requests.get(jsonFileLink) |
| 156 | + |
| 157 | + return result.json() |
| 158 | + |
| 159 | + |
| 160 | +def get_date(df): |
| 161 | + """ |
| 162 | + Get unique genotypes from the DataFrame. |
| 163 | +
|
| 164 | + This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype' |
| 165 | + column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype' |
| 166 | + column. |
| 167 | +
|
| 168 | + Parameters: |
| 169 | + df (pandas.DataFrame): The input DataFrame containing the 'genotype' column. |
| 170 | +
|
| 171 | + Returns: |
| 172 | + numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column. |
| 173 | + """ |
| 174 | + |
| 175 | + jsonFile = get_jsonFile( |
| 176 | + df |
| 177 | + ) # get the jsonFile information with get_jsonFile() function |
| 178 | + dateList = jsonFile["creation_date"].split() # get creation_date |
| 179 | + mntList = dict( |
| 180 | + (month, index) for index, month in enumerate(calendar.month_abbr) if month |
| 181 | + ) # month abbr to number |
| 182 | + yr = dateList[4] # get year |
| 183 | + mnt = mntList[dateList[1]] # get month |
| 184 | + day = dateList[2] # get day |
| 185 | + return f"{yr}-{day}-{mnt}" # format in year-day-month |
| 186 | + |
| 187 | + |
127 | 188 | def today():
|
128 | 189 | """
|
129 | 190 | Get today's snapshot of Brain Image Library.
|
@@ -530,7 +591,17 @@ def __get_cnbtaxonomy(df):
|
530 | 591 |
|
531 | 592 | def __get_genotypes(df):
|
532 | 593 | """
|
533 |
| - Write documentation here. |
| 594 | + Get unique genotypes from the DataFrame. |
| 595 | +
|
| 596 | + This function takes a pandas DataFrame as input and extracts the unique values from the 'genotype' |
| 597 | + column of the DataFrame. It returns an array containing the unique genotypes present in the 'genotype' |
| 598 | + column. |
| 599 | +
|
| 600 | + Parameters: |
| 601 | + df (pandas.DataFrame): The input DataFrame containing the 'genotype' column. |
| 602 | +
|
| 603 | + Returns: |
| 604 | + numpy.ndarray: An array containing the unique genotypes found in the 'genotype' column. |
534 | 605 | """
|
535 | 606 | return df["genotype"].unique()
|
536 | 607 |
|
|
0 commit comments