|
| 1 | +import os |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import seaborn as sns |
| 6 | + |
| 7 | + |
| 8 | +class Group: |
| 9 | + |
| 10 | + def __init__(self) -> None: |
| 11 | + pass |
| 12 | + |
| 13 | + @staticmethod |
| 14 | + def load_task_data(task_name, relevant_columns, root_dir="../../../data"): |
| 15 | + """ |
| 16 | + Loads all subject CSVs corresponding to a specified task, extracts relevant columns, |
| 17 | + and appends them (with the subject ID as the first column) into one DataFrame. |
| 18 | + |
| 19 | + Parameters: |
| 20 | + task_name (str): The name of the task folder (e.g., "AF", "NTS", "DWL"). |
| 21 | + relevant_columns (list): A list of column names (strings) to extract from each CSV. |
| 22 | + root_dir (str): The root data directory. Default is "Data". |
| 23 | + |
| 24 | + Returns: |
| 25 | + pandas.DataFrame: A DataFrame containing the subject ID (as 'subjectID') and the relevant columns |
| 26 | + from every CSV that was found. |
| 27 | + """ |
| 28 | + # List to collect dataframes for each subject |
| 29 | + data_frames = [] |
| 30 | + |
| 31 | + # Loop through each study directory in the root directory |
| 32 | + for study in os.listdir(root_dir): |
| 33 | + study_path = os.path.join(root_dir, study) |
| 34 | + if os.path.isdir(study_path): |
| 35 | + # Loop through each site directory within the study directory |
| 36 | + for site in os.listdir(study_path): |
| 37 | + site_path = os.path.join(study_path, site) |
| 38 | + if os.path.isdir(site_path): |
| 39 | + # Loop through each subject directory within the site directory |
| 40 | + for subject in os.listdir(site_path): |
| 41 | + subject_path = os.path.join(site_path, subject) |
| 42 | + if os.path.isdir(subject_path): |
| 43 | + # Construct the path to the task directory for this subject |
| 44 | + task_path = os.path.join(subject_path, task_name) |
| 45 | + if os.path.isdir(task_path): |
| 46 | + # Look for the 'data' folder within the task directory |
| 47 | + data_folder = os.path.join(task_path, "data") |
| 48 | + if os.path.isdir(data_folder): |
| 49 | + # Process each CSV file found in the data folder |
| 50 | + for file in os.listdir(data_folder): |
| 51 | + # if file ends with .csv and second part of the filename is 'ses-1' |
| 52 | + if file.endswith("2.csv"): |
| 53 | + continue |
| 54 | + elif file.endswith(".csv") and "_cat-1_" in file: |
| 55 | + csv_path = os.path.join(data_folder, file) |
| 56 | + print(f"Processing {csv_path}") |
| 57 | + # if the character before .csv is 2 then skip |
| 58 | + try: |
| 59 | + # Load the CSV into a temporary DataFrame |
| 60 | + temp_df = pd.read_csv(csv_path) |
| 61 | + # Select only the relevant columns (if they exist) |
| 62 | + # It is assumed that every CSV contains all desired columns; |
| 63 | + # you may wish to add error handling if some files do not. |
| 64 | + filtered_df = temp_df[relevant_columns].copy() |
| 65 | + # Insert the subject ID as the first column |
| 66 | + filtered_df.insert(0, "subjectID", subject) |
| 67 | + # Append the DataFrame for this subject to the list |
| 68 | + data_frames.append(filtered_df) |
| 69 | + except Exception as e: |
| 70 | + print(f"Error processing {csv_path}: {e}") |
| 71 | + finally: |
| 72 | + # Delete the temporary dataframe to free memory |
| 73 | + del temp_df |
| 74 | + del filtered_df |
| 75 | + |
| 76 | + # Concatenate all subject DataFrames into one, resetting the index |
| 77 | + if data_frames: |
| 78 | + final_df = pd.concat(data_frames, ignore_index=True) |
| 79 | + else: |
| 80 | + # If no data was found, return an empty DataFrame with the appropriate columns |
| 81 | + final_df = pd.DataFrame(columns=["subjectID"] + relevant_columns) |
| 82 | + |
| 83 | + return final_df |
| 84 | + |
| 85 | + def return_dfs(self): |
| 86 | + pass |
| 87 | + |
0 commit comments