Skip to content

Commit 1bcd457

Browse files
Merge pull request #2 from neuroprismlab/abcd_dataset_code
Abcd dataset code
2 parents 487b3b7 + b1de387 commit 1bcd457

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+779
-255
lines changed

NBS_addon/NBSedge_level_parametric_corr.m

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@
6767
GLM = NBSglm_setup_smn(GLM);
6868
edge_stats__target = NBSglm_smn(GLM);
6969

70+
% If gt - we only need the result from the glm and the cluster stats
71+
if STATS.ground_truth
72+
% Get the shape of edge_stats__target
73+
shape = size(edge_stats__target);
74+
75+
% Create variables with the same shape
76+
con_mat = {false(shape)}; % Empty logical array in a cell
77+
pval = {NaN(shape)}; % NaN array inside a cell
78+
any_significant = false; % Ensures downstream code doesn't break
79+
80+
return;
81+
end
82+
83+
7084
if strcmp(GLM.test,'onesample')
7185
% Calculate degrees of freedom for one-sample t-test
7286
df = GLM.n_observations - 1;

NBS_addon/NBSrun_smn.m

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,12 @@
402402
% Number of nodes
403403
nbs.STATS.N = DIMS.nodes;
404404

405+
try
406+
nbs.STATS.ground_truth = logical(UI.ground_truth);
407+
catch
408+
error('Error setting up gt value')
409+
end
410+
405411
% Do error checking on user inputs
406412
[msg,stop] = errorcheck(UI,DIMS,S);
407413

@@ -467,6 +473,11 @@
467473
end
468474

469475

476+
% If gt change the method
477+
% We only need the GLM result for the gt
478+
479+
480+
470481
%Do NBS
471482
if strcmp(UI.method.ui, 'Run NBS')
472483

NBS_addon/NBSstats_smn.m

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
function [any_significant, con_mat, pval, edge_stats__target, cluster_stats__target] = NBSstats_smn(varargin)
1+
function [any_significant, con_mat, pval, edge_stats__target, cluster_stats__target] = ...
2+
NBSstats_smn(varargin)
23
%NBSstats Computes network components among edges that survive a primary
34
%test statistic threshold. Assigns a corrected p-value to each component
45
%using permuted data that has been supplied as part of the structure STATS.
@@ -187,6 +188,25 @@
187188
edge_stats__target = get_univariate_stats(STATS,GLM,precomputed,1);
188189
[cluster_stats__target, max_stat__target] = get_cluster_stats(edge_stats__target,STATS,ind_upper, N, Intensity,bgl);
189190

191+
% If gt - we only need the result from the glm and the cluster stats
192+
if STATS.ground_truth
193+
% Get the shape of cluster_stats__target
194+
shape = size(cluster_stats__target);
195+
196+
% Create variables with the same shape
197+
con_mat = false(shape); % Logical array (not inside a cell)
198+
199+
% Check if Omnibus method
200+
if strcmp(STATS.statistic_type, 'Omnibus')
201+
pval = 1; % Scalar p-value
202+
else
203+
pval = NaN(shape); % NaN array (not inside a cell)
204+
end
205+
206+
any_significant = false; % Ensures downstream code doesn't break
207+
return;
208+
end
209+
190210
% 2, Permuted cluster statistics
191211
%First row of test_stat is the observed test statistics, so start at the second row
192212
for i=2:K+1

NBS_benchmarking/setup_benchmarking.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,14 @@
8585
UI.mask.ui = RepParams.mask;
8686
% UI.do_Constrained_FWER_second_level.ui=do_Constrained_FWER_second_level;
8787

88+
% Ground truth calculations do not require p-values
89+
UI.ground_truth = RepParams.ground_truth;
90+
8891
%% Set up DIMS
8992
UI.DIMS.nodes = RepParams.n_nodes;
9093
UI.DIMS.observations = RepParams.n_subs_subset;
9194
UI.DIMS.predictors = size(RepParams.X_rep, 2);
9295

96+
9397
end
9498

NBS_benchmarking/summarize_tprs.m

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,16 @@
5656
% addOptional(p,'summary_type',summary_type_default);
5757
addRequired(p,'summary_type');
5858
addOptional(p,'tpr_dthresh', 0);
59-
addOptional(p,'save_directory', './power_calculation_results/')
59+
addOptional(p,'save_directory', NaN)
6060
parse(p, summary_type, varargin{:});
6161

6262
% summary_type=p.Results.summary_type;
6363
tpr_dthresh = p.Results.tpr_dthresh;
64-
save_directory = [p.Results.save_directory, '/power_calculation/'];
64+
save_directory = p.Results.save_directory;
65+
66+
if isnan(save_directory)
67+
error('Save directory not specified')
68+
end
6569

6670
% Make save directory if it does not exist
6771

@@ -104,7 +108,7 @@
104108

105109
%% Check if already calculated and get file_name
106110
data_set_name = strcat(rep_data.meta_data.dataset, '_', rep_data.meta_data.map);
107-
test_components = strcat(rep_data.meta_data.test_components{1}, '_', rep_data.meta_data.test_components{2});
111+
test_components = get_test_components_from_meta_data(rep_data.meta_data.test_components);
108112
[~, file_name] = create_and_check_rep_file(save_directory, data_set_name, test_components, ...
109113
rep_data.meta_data.test, rep_data.meta_data.test_type, ...
110114
rep_data.meta_data.omnibus, rep_data.meta_data.subject_number, ...

NBS_benchmarking/support_scripts/atlas_scripts/load_atlas_edge_groups.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function edge_groups=load_atlas_edge_groups(n_nodes, mapping_category, atlas_file)
1+
function edge_groups=load_atlas_edge_groups(atlas_file)
22

33
% returns a n_nodes x n_nodes matrix where the value of each edge reflects
44
% the network pair it belongs to
0 Bytes
Binary file not shown.

calculate_gt.m

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,27 @@
2222
clear(vars{:}); % Clear all other variables
2323
clc;
2424

25-
data_dir = '/Users/f.cravogomes/Desktop/Cloned Repos/NBS_Calculator/data/s_hcp_fc_noble_tasks.mat';
26-
if ~exist('Dataset', 'var')
27-
Dataset = load(data_dir);
28-
else
29-
% disp('Data already loaded')
30-
end
31-
% Extract dataset name
32-
[~, data_set_name, ~] = fileparts(data_dir);
33-
3425
[current_path, ~, ~] = fileparts(mfilename('fullpath')); % assuming NBS_benchmarking is current folder
3526
addpath(genpath(current_path));
3627

3728
%% Prepare parameters and dataset
3829
Params = setparams();
30+
Params.ground_truth = true;
3931

32+
%% Load data
4033
if ~exist('Dataset', 'var')
4134
Dataset = load(Params.data_dir);
42-
else
43-
% disp('Data already loaded')
4435
end
4536

4637
%% Set n_nodes, n_var, n_repetitions
4738
Params = setup_experiment_data(Params, Dataset);
4839

49-
%% Create directory and get dataset name
50-
Params.save_directory = [Params.save_directory, 'ground_truth/'];
51-
Params = create_output_directory(Params);
52-
Params.data_set = get_data_set_name(Dataset);
40+
%% Create directory and get dataset name - get atlas
41+
[Params.data_set, Params.data_set_base, Params.data_set_map] = get_data_set_name(Dataset);
42+
Params.atlas_file = atlas_data_set_map(Params);
43+
44+
%% Create gt output directory
45+
Params = create_gt_output_directory(Params);
5346

5447
%% Paralle workers
5548
setup_parallel_workers(Params.parallel, Params.n_workers);
@@ -66,23 +59,30 @@
6659
RP = Params;
6760
RP.test_name = t;
6861

69-
RP = infer_test_from_data(RP, OutcomeData.(t), BrainData);
62+
[RP, test_type_origin] = infer_test_from_data(RP, OutcomeData.(t), BrainData);
63+
64+
% Important, X is calculated here for all test_types
65+
% However, only the r test type will use this test
66+
switch test_type_origin
67+
68+
% Here: RP.test_name, RP.n_subs_1, RP.n_subs_2, RP.n_subs
69+
case 'score_cond'
70+
[X, Y , RP] = subs_data_from_score_condition(RP, OutcomeData.(t), BrainData, t);
71+
72+
case 'contrast'
73+
[X, Y , RP] = subs_data_from_contrast(RP, OutcomeData.(t).contrast, BrainData);
74+
75+
otherwise
76+
error('Test type origin not found')
77+
78+
end
7079

71-
% y_and_x also extracts subject number and sub_ids
72-
% Encapsulate this level of setup in a function if there is to much
73-
% function calls here
74-
[~, Y , RP] = subs_data_from_contrast(RP, OutcomeData.(t).contrast, BrainData);
7580
[RP.triumask, RP.trilmask] = create_masks_from_nodes(size(RP.mask, 1));
7681

7782
% Sets subject repetition to all subjects and others
7883
RP = setup_ground_truth_parameters(RP);
7984

80-
run_benchmarking(RP, Y)
81-
82-
return;
83-
if RP.testing == 1 && ti == 2
84-
return;
85-
end
85+
run_benchmarking(RP, Y, X)
8686

8787
end
8888

calculate_power.m

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,49 @@
1-
%% Questions
2-
% There appears to be something wrong in the network level power
3-
% calculations - edge level and whole brain are fine
4-
% I likely made an error in the atlas
5-
% Cluster has network-based stats not edge
6-
% Found it - Likely problem in edge groups!
7-
%
8-
%% Potential fix - More resonable results!
9-
% Used tril mask on extract_atlas_related_parameters for edge groups
10-
% Removed atlas reordering in setupbenchmarking
11-
12-
% Initial setup
1+
%% Initial setup
132
scriptDir = fileparts(mfilename('fullpath'));
143
addpath(genpath(scriptDir));
154
cd(scriptDir);
16-
vars = who; % Get a list of all variable names in the workspace
17-
vars(strcmp(vars, 'RepData')) = []; % Remove the variable you want to keep from the list
18-
vars(strcmp(vars, 'GtData')) = [];
19-
clear(vars{:}); % Clear all other variables
5+
clearvars -except RepData GtData;
206
clc;
217

22-
%% Directory to save and find rep data - TODO add them all
8+
%% Directory to save and find rep data
239
Params = setparams();
2410

25-
%% Create storage directory - only if it does not exist
26-
if ~exist(Params.save_directory, 'dir') % Check if the directory does not exist
27-
mkdir(Params.save_directory); % Create the directory
11+
% Load dataset information
12+
if ~exist('Dataset', 'var')
13+
Study_Info = load(Params.data_dir, 'study_info');
2814
end
15+
[Params.data_set, ~, ~] = get_data_set_name(Study_Info);
16+
17+
%% Process each repetition file one by one to reduce memory usage
18+
rep_files = dir(fullfile(Params.save_directory, Params.data_set, '*.mat'));
2919

30-
if ~exist('RepData', 'var') || ~exist('GtData', 'var')
31-
[GtData, RepData] = load_rep_and_gt_results(Params, 'gt_origin', Params.gt_origin);
32-
end
20+
%% Create output directory (only if it doesn't exist)
21+
Params = create_power_output_directory(Params);
3322

34-
power_calculation_tprs = @(x) summarize_tprs('calculate_tpr', x, GtData, ...
35-
'save_directory', Params.save_directory);
36-
dfs_struct(power_calculation_tprs, RepData);
23+
for i = 1:length(rep_files)
24+
% Load a single repetition data file
25+
rep_file_path = fullfile(rep_files(i).folder, rep_files(i).name);
26+
rep_data = load(rep_file_path);
27+
28+
% Extract metadata to find corresponding GT file
29+
if isfield(rep_data, 'meta_data') && isfield(rep_data.meta_data, 'test_components')
30+
gt_filename = construct_gt_filename(rep_data.meta_data);
31+
gt_file_path = [Params.gt_data_dir, Params.data_set, '/', gt_filename];
32+
33+
% Load only the necessary GT data
34+
if exist(gt_file_path, 'file')
35+
gt_data = load(gt_file_path);
36+
else
37+
warning('GT file %s not found. Skipping...', gt_filename);
38+
continue;
39+
end
3740

41+
% Compute power using the extracted repetition and GT data
42+
summarize_tprs('calculate_tpr', rep_data, gt_data, 'save_directory', Params.save_directory);
43+
else
44+
warning('Metadata missing in %s, skipping...', rep_files(i).name);
45+
end
46+
47+
% Free memory before next iteration
48+
clear rep_data gt_data;
49+
end

calculate_power_old.m

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
%% Questions
2+
% There appears to be something wrong in the network level power
3+
% calculations - edge level and whole brain are fine
4+
% I likely made an error in the atlas
5+
% Cluster has network-based stats not edge
6+
% Found it - Likely problem in edge groups!
7+
%
8+
%% Potential fix - More resonable results!
9+
% Used tril mask on extract_atlas_related_parameters for edge groups
10+
% Removed atlas reordering in setupbenchmarking
11+
12+
% Initial setup
13+
scriptDir = fileparts(mfilename('fullpath'));
14+
addpath(genpath(scriptDir));
15+
cd(scriptDir);
16+
vars = who; % Get a list of all variable names in the workspace
17+
vars(strcmp(vars, 'RepData')) = []; % Remove the variable you want to keep from the list
18+
vars(strcmp(vars, 'GtData')) = [];
19+
clear(vars{:}); % Clear all other variables
20+
clc;
21+
22+
%% Directory to save and find rep data - TODO add them all
23+
Params = setparams();
24+
25+
% Load dataset
26+
if ~exist('Dataset', 'var')
27+
Study_Info = load(Params.data_dir, 'study_info');
28+
end
29+
[Params.data_set, ~, ~] = get_data_set_name(Study_Info);
30+
31+
if ~exist('RepData', 'var') || ~exist('GtData', 'var')
32+
[GtData, RepData] = load_rep_and_gt_results(Params, 'gt_origin', Params.gt_origin, ...
33+
'dataset', Params.data_set);
34+
end
35+
36+
%% Create storage directory - only if it does not exist
37+
% the previous directory is used to load the data
38+
Params = create_power_output_directory(Params);
39+
40+
power_calculation_tprs = @(x) summarize_tprs('calculate_tpr', x, GtData, ...
41+
'save_directory', Params.save_directory);
42+
dfs_struct(power_calculation_tprs, RepData);
43+

0 commit comments

Comments
 (0)