Skip to content

Commit a86406e

Browse files
committed
adding create_json for new js app
1 parent efba109 commit a86406e

File tree

5 files changed

+5220
-1726
lines changed

5 files changed

+5220
-1726
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import os
2+
import pandas as pd
3+
import numpy as np
4+
5+
6+
7+
def create_json(data_folder):
8+
"""
9+
Constructs a master list of all tasks grouped by Subject ID at application startup.
10+
11+
Output:
12+
Dictionary:
13+
subject_id: {
14+
site: str,
15+
project: str, # 'int' or 'obs'
16+
tasks: {
17+
task_name: {
18+
date: str,
19+
category: str,
20+
png_paths: list
21+
}
22+
}
23+
}
24+
"""
25+
directories = ['int', 'obs']
26+
master_data = {}
27+
28+
for directory in directories:
29+
dir_path = os.path.join(data_folder, directory)
30+
31+
for site in os.listdir(dir_path): # Iterate over site folders (e.g., UI, NE)
32+
site_path = os.path.join(dir_path, site)
33+
34+
if not os.path.isdir(site_path):
35+
continue
36+
37+
for subject_id in os.listdir(site_path): # Iterate over subject folders (e.g., 8006, 9002)
38+
subject_path = os.path.join(site_path, subject_id)
39+
40+
if not os.path.isdir(subject_path):
41+
continue
42+
43+
# Initialize subject entry if not already in master_data
44+
if subject_id not in master_data:
45+
master_data[subject_id] = {
46+
'site': site,
47+
'project': directory,
48+
'tasks': {}
49+
}
50+
51+
for task_name in os.listdir(subject_path): # Iterate over task folders (e.g., AF, DSST)
52+
task_path = os.path.join(subject_path, task_name)
53+
54+
if not os.path.isdir(task_path):
55+
continue
56+
57+
plots_path = os.path.join(task_path, 'plot')
58+
data_path = os.path.join(task_path, 'data')
59+
60+
# Initialize task entry if not already in tasks
61+
if task_name not in master_data[subject_id]['tasks']:
62+
master_data[subject_id]['tasks'][task_name] = {
63+
'date': None,
64+
'category': None,
65+
'png_paths': [],
66+
'session': None
67+
}
68+
69+
# Extract date and category from CSV in data directory
70+
csv_file = [
71+
file for file in os.listdir(data_path)
72+
if file.endswith('.csv')
73+
]
74+
if csv_file:
75+
csv_filename = csv_file[0]
76+
77+
# Load the CSV into a DataFrame
78+
df = pd.read_csv(os.path.join(data_path, csv_filename))
79+
80+
# Validate and extract the 'Date' column
81+
if 'datetime' in df.columns:
82+
date_value = df['datetime'].iloc[0] # Extract the first value in the 'Date' column
83+
else:
84+
date_value = None # Set to None or handle it as needed
85+
86+
# Extract the category from the filename
87+
category_value = csv_filename.split('_')[-1].replace('.csv', '').replace('cat-', '')
88+
session_value = csv_filename.split('_')[-2].replace('ses-', '')
89+
90+
# Update master_data
91+
master_data[subject_id]['tasks'][task_name]['date'] = date_value
92+
master_data[subject_id]['tasks'][task_name]['category'] = category_value
93+
master_data[subject_id]['tasks'][task_name]['session'] = session_value
94+
95+
# Remove the DataFrame from memory
96+
del df
97+
98+
# Collect PNG file paths from plot directory
99+
if os.path.exists(plots_path):
100+
png_files = [
101+
os.path.join(plots_path, png)
102+
for png in os.listdir(plots_path)
103+
if png.endswith('.png')
104+
]
105+
master_data[subject_id]['tasks'][task_name]['png_paths'].extend(png_files)
106+
107+
108+
import json
109+
110+
111+
with open('../../data/data.json', 'w') as f:
112+
json.dump(master_data, f, indent=2)
113+
return None
114+
115+
116+

code/group/group.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import os
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import seaborn as sns
6+
7+
8+
class Group:
9+
10+
def __init__(self) -> None:
11+
pass
12+
13+
@staticmethod
14+
def load_task_data(task_name, relevant_columns, root_dir="../../../data"):
15+
"""
16+
Loads all subject CSVs corresponding to a specified task, extracts relevant columns,
17+
and appends them (with the subject ID as the first column) into one DataFrame.
18+
19+
Parameters:
20+
task_name (str): The name of the task folder (e.g., "AF", "NTS", "DWL").
21+
relevant_columns (list): A list of column names (strings) to extract from each CSV.
22+
root_dir (str): The root data directory. Default is "Data".
23+
24+
Returns:
25+
pandas.DataFrame: A DataFrame containing the subject ID (as 'subjectID') and the relevant columns
26+
from every CSV that was found.
27+
"""
28+
# List to collect dataframes for each subject
29+
data_frames = []
30+
31+
# Loop through each study directory in the root directory
32+
for study in os.listdir(root_dir):
33+
study_path = os.path.join(root_dir, study)
34+
if os.path.isdir(study_path):
35+
# Loop through each site directory within the study directory
36+
for site in os.listdir(study_path):
37+
site_path = os.path.join(study_path, site)
38+
if os.path.isdir(site_path):
39+
# Loop through each subject directory within the site directory
40+
for subject in os.listdir(site_path):
41+
subject_path = os.path.join(site_path, subject)
42+
if os.path.isdir(subject_path):
43+
# Construct the path to the task directory for this subject
44+
task_path = os.path.join(subject_path, task_name)
45+
if os.path.isdir(task_path):
46+
# Look for the 'data' folder within the task directory
47+
data_folder = os.path.join(task_path, "data")
48+
if os.path.isdir(data_folder):
49+
# Process each CSV file found in the data folder
50+
for file in os.listdir(data_folder):
51+
# if file ends with .csv and second part of the filename is 'ses-1'
52+
if file.endswith("2.csv"):
53+
continue
54+
elif file.endswith(".csv") and "_cat-1_" in file:
55+
csv_path = os.path.join(data_folder, file)
56+
print(f"Processing {csv_path}")
57+
# if the character before .csv is 2 then skip
58+
try:
59+
# Load the CSV into a temporary DataFrame
60+
temp_df = pd.read_csv(csv_path)
61+
# Select only the relevant columns (if they exist)
62+
# It is assumed that every CSV contains all desired columns;
63+
# you may wish to add error handling if some files do not.
64+
filtered_df = temp_df[relevant_columns].copy()
65+
# Insert the subject ID as the first column
66+
filtered_df.insert(0, "subjectID", subject)
67+
# Append the DataFrame for this subject to the list
68+
data_frames.append(filtered_df)
69+
except Exception as e:
70+
print(f"Error processing {csv_path}: {e}")
71+
finally:
72+
# Delete the temporary dataframe to free memory
73+
del temp_df
74+
del filtered_df
75+
76+
# Concatenate all subject DataFrames into one, resetting the index
77+
if data_frames:
78+
final_df = pd.concat(data_frames, ignore_index=True)
79+
else:
80+
# If no data was found, return an empty DataFrame with the appropriate columns
81+
final_df = pd.DataFrame(columns=["subjectID"] + relevant_columns)
82+
83+
return final_df
84+
85+
def return_dfs(self):
86+
pass
87+

0 commit comments

Comments
 (0)