Skip to content

Commit ad0264f

Browse files
authored
Merge pull request #100 from openproblems-bio/jalil
grn evaluation pipeline is using dataset specific metrics
2 parents 69cc314 + 427e5a9 commit ad0264f

File tree

29 files changed

+397
-536
lines changed

29 files changed

+397
-536
lines changed
30.6 KB
Loading

scripts/run_all.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ datasets=('replogle' 'op') #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xair
55
run_local=false # set to true to run locally, false to run on AWS
66

77
run_grn_inference=false
8-
run_grn_evaluation=false
9-
run_download=true
8+
run_grn_evaluation=true
9+
run_download=false
1010

1111

1212
for dataset in "${datasets[@]}"; do

scripts/run_grn_evaluation.sh

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ fi
6565

6666

6767
num_workers=10
68-
metric_ids="[all_metrics]" # regression, ws_distance, sem, tf_recovery, replica_consistency
6968
RUN_ID="${DATASET}_evaluation"
7069
models_folder="${DATASET}/"
7170
apply_tf=true
@@ -102,6 +101,11 @@ param_aws="s3://openproblems-data/resources/grn/results/params/${RUN_ID}_param_l
102101
> "$param_local"
103102
> "$param_file"
104103

104+
# Generate and source config file
105+
echo "Generating dataset configuration..."
106+
python src/utils/config.py --output src/utils/dataset_config.env
107+
source src/utils/dataset_config.env
108+
105109
if [ "$RUN_LOCAL" = true ]; then
106110
cat >> "$param_local" << HERE
107111
param_list:
@@ -112,6 +116,16 @@ append_entry() {
112116
local grn_name="$1"
113117
local prediction="$2"
114118
local dataset="$3"
119+
120+
# Get cell type and metrics from sourced env variables
121+
cell_type_var="CELLTYPE_${dataset}"
122+
metrics_var="METRICS_${dataset}"
123+
124+
cell_type="${!cell_type_var}"
125+
metric_ids="[${!metrics_var}]"
126+
127+
echo ${dataset} ${cell_type} ${metric_ids}
128+
115129
if [[ "$dataset" =~ ^(norman|nakatake|adamson)$ ]]; then
116130
layer_='X_norm'
117131
else
@@ -127,14 +141,22 @@ append_entry() {
127141
apply_tf: ${apply_tf}
128142
reg_type: ${reg_type}
129143
layer: $layer_
130-
144+
131145
HERE
132146
# Additional fields for specific datasets
133147
if [[ "$dataset" =~ ^(norman|replogle|adamson|xaira_HCT116|xaira_HEK293T)$ ]]; then
134148
cat >> "$param_local" << HERE
135149
ws_consensus: ${resources_dir}/grn_benchmark/prior/ws_consensus_${dataset}.csv
136150
ws_distance_background: ${resources_dir}/grn_benchmark/prior/ws_distance_background_${dataset}.csv
137151
evaluation_data_de: ${resources_dir}/grn_benchmark/evaluation_data/${dataset}_de.h5ad
152+
HERE
153+
fi
154+
155+
if [[ "$dataset" != "nakatake" ]]; then
156+
cat >> "$param_local" << HERE
157+
ground_truth_unibind: ${resources_dir}/grn_benchmark/ground_truth/${cell_type}_unibind.csv
158+
ground_truth_chipatlas: ${resources_dir}/grn_benchmark/ground_truth/${cell_type}_chipatlas.csv
159+
ground_truth_remap: ${resources_dir}/grn_benchmark/ground_truth/${cell_type}_remap.csv
138160
HERE
139161
fi
140162
}

src/api/comp_metric.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,26 @@ arguments:
5959
direction: input
6060
default: ridge
6161
description: name of regression to use
62-
- name: --ground_truth
62+
- name: --ground_truth_unibind
6363
type: file
6464
direction: input
6565
must_exist: false
6666
required: false
6767
example: resources_test/grn_benchmark/ground_truth/PBMC.csv
68+
- name: --ground_truth_chipatlas
69+
type: file
70+
direction: input
71+
must_exist: false
72+
required: false
73+
example: resources_test/grn_benchmark/ground_truth/PBMC.csv
74+
- name: --ground_truth_remap
75+
type: file
76+
direction: input
77+
must_exist: false
78+
required: false
79+
example: resources_test/grn_benchmark/ground_truth/PBMC.csv
80+
81+
6882
- name: --ws_consensus
6983
type: file
7084
direction: input

src/metrics/all_metrics/config.vsh.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ resources:
2424
dest: tf_binding_helper.py
2525
- path: /src/metrics/replica_consistency/helper.py
2626
dest: replica_consistency_helper.py
27-
- path: /src/utils/dataset_config.py
28-
dest: dataset_config.py
29-
- path: /src/metrics/metrics_config.py
30-
dest: metrics_config.py
27+
- path: /src/utils/config.py
28+
dest: config.py
3129

3230

3331

src/metrics/all_metrics/helper.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,33 +40,33 @@
4040
except:
4141
from replica_consistency.helper import main as main_replica_consistency
4242

43-
from metrics_config import datasets_metrics
43+
from config import DATASETS_METRICS
4444

4545

4646
def sem_metric(par, dataset_id):
47-
if dataset_id in datasets_metrics:
48-
if 'sem' in datasets_metrics[dataset_id]:
47+
if dataset_id in DATASETS_METRICS:
48+
if 'sem' in DATASETS_METRICS[dataset_id]:
4949
output = main_sem(par)
5050
return output
5151
return None
5252

5353
def tf_rec_metric(par, dataset_id):
54-
if dataset_id in datasets_metrics:
55-
if 'tf_recovery' in datasets_metrics[dataset_id]:
54+
if dataset_id in DATASETS_METRICS:
55+
if 'tf_recovery' in DATASETS_METRICS[dataset_id]:
5656
output = main_tf_rec(par)
5757
return output
5858
return None
5959

6060
def tf_binding_metric(par, dataset_id):
61-
if dataset_id in datasets_metrics:
62-
if 'tf_binding' in datasets_metrics[dataset_id]:
61+
if dataset_id in DATASETS_METRICS:
62+
if 'tf_binding' in DATASETS_METRICS[dataset_id]:
6363
output = main_tf_binding(par)
6464
return output
6565
return None
6666

6767
def replica_consistency_metric(par, dataset_id):
68-
if dataset_id in datasets_metrics:
69-
if 'replica_consistency' in datasets_metrics[dataset_id]:
68+
if dataset_id in DATASETS_METRICS:
69+
if 'replica_consistency' in DATASETS_METRICS[dataset_id]:
7070
try:
7171
output = main_replica_consistency(par)
7272
except:
@@ -75,15 +75,15 @@ def replica_consistency_metric(par, dataset_id):
7575
return None
7676

7777
def reg2_metric(par, dataset_id):
78-
if dataset_id in datasets_metrics:
79-
if 'regression' in datasets_metrics[dataset_id]:
78+
if dataset_id in DATASETS_METRICS:
79+
if 'regression' in DATASETS_METRICS[dataset_id]:
8080
output = main_reg(par)
8181
return output
8282
return None
8383

8484
def ws_distance_metric(par, dataset_id):
85-
if dataset_id in datasets_metrics:
86-
if 'ws_distance' in datasets_metrics[dataset_id]:
85+
if dataset_id in DATASETS_METRICS:
86+
if 'ws_distance' in DATASETS_METRICS[dataset_id]:
8787
_, output = main_ws_distance(par)
8888
return output
8989
return None

src/metrics/experimental/anchor_regression/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
np.random.seed(seed)
2323

2424
from util import read_prediction, manage_layer
25-
from dataset_config import DATASET_GROUPS
25+
from config import DATASET_GROUPS
2626

2727

2828
def encode_obs_cols(adata, cols):

src/metrics/experimental/regression_3/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
np.random.seed(seed)
2626

2727
from util import read_prediction, manage_layer
28-
from dataset_config import DATASET_GROUPS
28+
from config import DATASET_GROUPS
2929

3030

3131
def encode_obs_cols(adata, cols):

src/metrics/experimental/replica_consistency/script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
from helper import main
2626
from util import format_save_score, parse_args
27-
# from dataset_config import DATASET_GROUPS
27+
# from config import DATASET_GROUPS
2828

2929
par = parse_args(par)
3030

src/metrics/experimental/vc/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
torch.use_deterministic_algorithms(True)
3333

3434
from util import read_prediction, manage_layer
35-
from dataset_config import DATASET_GROUPS
35+
from config import DATASET_GROUPS
3636
from scipy.spatial.distance import cityblock
3737

3838

0 commit comments

Comments
 (0)