Skip to content

Commit d57df34

Browse files
authored
Merge pull request #101 from openproblems-bio/jalil
stable metrics are selected
2 parents ad0264f + 23168dc commit d57df34

File tree

23 files changed

+468
-212
lines changed

23 files changed

+468
-212
lines changed
72.3 KB
Loading
236 KB
Loading

scripts/process_data/rest.sh

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,28 @@
44
#SBATCH --error=logs/%j.err
55
#SBATCH --ntasks=1
66
#SBATCH --cpus-per-task=10
7-
#SBATCH --time=10:00:00
8-
#SBATCH --mem=1000GB
7+
#SBATCH --time=20:00:00
8+
#SBATCH --mem=500GB
99
#SBATCH --partition=cpu
1010
#SBATCH --mail-type=END,FAIL
1111
#SBATCH --mail-user=jalil.nourisa@gmail.com
1212

1313

1414
set -e
1515

16-
python src/process_data/adamson/script.py
17-
python src/process_data/nakatake/script.py
18-
python src/process_data/norman/script.py
16+
# python src/process_data/main/adamson/script.py
17+
# python src/process_data/main/nakatake/script.py
18+
# python src/process_data/main/norman/script.py
1919

20-
echo "Processing opsca"
21-
python src/process_data/opsca/script.py
20+
# echo "Processing opsca"
21+
# python src/process_data/main/opsca/script.py
2222
echo "Processing replogle"
23-
python src/process_data/replogle/script.py #--run_test #--run_test
24-
echo "Processing xaira"
25-
python src/process_data/xaira/script.py #--run_test
23+
# python src/process_data/main/replogle/script.py #--run_test #--run_test
24+
# echo "Processing xaira"
25+
python src/process_data/main/xaira/script.py #--run_test
2626

2727

28-
echo "Processing 300BCG"
29-
python src/process_data/300BCG/script.py
30-
echo "Processing IBD"
31-
python src/process_data/ibd/script.py
28+
# echo "Processing 300BCG"
29+
# python src/process_data/main/300BCG/script.py
30+
# echo "Processing IBD"
31+
# python src/process_data/main/ibd/script.py

scripts/run_all.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
set -e
22

3-
datasets=('replogle' 'op') #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd' '300BCG') #
3+
datasets=( 'xaira_HEK293T' 'xaira_HCT116' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman' 'xaira_HEK293T' 'xaira_HCT116' 'parsebioscience' 'ibd' '300BCG') #
44

55
run_local=false # set to true to run locally, false to run on AWS
66

scripts/run_grn_evaluation.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ echo "Generating dataset configuration..."
106106
python src/utils/config.py --output src/utils/dataset_config.env
107107
source src/utils/dataset_config.env
108108

109+
109110
if [ "$RUN_LOCAL" = true ]; then
110111
cat >> "$param_local" << HERE
111112
param_list:

src/methods/pearson_corr/script.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
## VIASH START
1212
par = {
13-
'rna': 'resources/grn_benchmark/inference_data//op_rna.h5ad',
13+
'rna': 'resources/grn_benchmark/inference_data//replogle_rna.h5ad',
1414
'tf_all': 'resources/grn_benchmark/prior/tf_all.csv',
1515
'cell_type_specific': False,
1616
'max_n_links': 50000,
@@ -40,6 +40,8 @@ def main(par):
4040
print('Output GRN')
4141
print('Shape of the network:', net.shape)
4242
print(net.sort_values('weight', ascending=False, key=abs).head(10))
43+
print(net['source'].nunique(), 'TFs')
44+
print('Num neg signs?:', (net['weight'] < 0).sum())
4345
net = net.astype(str)
4446
output = ad.AnnData(
4547
X=None,

src/metrics/all_metrics/helper.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ def replica_consistency_metric(par, dataset_id):
7474
return output
7575
return None
7676

77-
def reg2_metric(par, dataset_id):
77+
def reg_metric(par, dataset_id):
7878
if dataset_id in DATASETS_METRICS:
7979
if 'regression' in DATASETS_METRICS[dataset_id]:
80-
output = main_reg(par)
80+
_ , output = main_reg(par)
8181
return output
8282
return None
8383

@@ -92,7 +92,9 @@ def ws_distance_metric(par, dataset_id):
9292
def main(par):
9393
dataset_id = ad.read_h5ad(par['evaluation_data'], backed='r').uns['dataset_id']
9494
rr_store = []
95-
metrics = [reg2_metric, ws_distance_metric, sem_metric, tf_rec_metric, replica_consistency_metric]
95+
metrics = [reg_metric, ws_distance_metric, sem_metric, tf_rec_metric, replica_consistency_metric, tf_binding_metric]
96+
# metrics = [tf_binding_metric, sem_metric, replica_consistency_metric]
97+
9698
for metric in metrics:
9799
print(f"Computing metric: {metric.__name__}")
98100
rr = metric(par, dataset_id)

src/metrics/all_metrics/run_local.sh

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ set -euo pipefail
1616
layer="lognorm"
1717
reg_type="ridge"
1818
num_workers=20
19+
prediction="output/net.h5ad"
20+
score="output/score.h5ad"
21+
dataset="op"
1922

2023
while [[ $# -gt 0 ]]; do
2124
case "$1" in
@@ -53,12 +56,15 @@ if [[ -n "$layer" ]]; then
5356
echo "Layer is set to: $layer"
5457
fi
5558

56-
# Check required args
57-
if [[ -z "${dataset:-}" || -z "${prediction:-}" || -z "${score:-}" ]]; then
58-
echo "Usage: $0 --dataset <name> --prediction <file> --score <metric>"
59-
exit 1
60-
fi
59+
# # Check required args
60+
# if [[ -z "${dataset:-}" || -z "${prediction:-}" || -z "${score:-}" ]]; then
61+
# echo "Usage: $0 --dataset <name> --prediction <file> --score <metric>"
62+
# exit 1
63+
# fi
6164

65+
source src/utils/dataset_config.env
66+
cell_type_var="CELLTYPE_${dataset}"
67+
cell_type="${!cell_type_var}"
6268

6369
# Run metrics
6470
python src/metrics/all_metrics/script.py \
@@ -67,6 +73,9 @@ python src/metrics/all_metrics/script.py \
6773
--evaluation_data_sc "resources/grn_benchmark/evaluation_data/${dataset}_sc.h5ad" \
6874
--evaluation_data_de "resources/grn_benchmark/evaluation_data/${dataset}_de.h5ad" \
6975
--regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${dataset}.json" \
76+
--ground_truth_unibind "resources/grn_benchmark/ground_truth/${cell_type}_unibind.csv" \
77+
--ground_truth_chipatlas "resources/grn_benchmark/ground_truth/${cell_type}_chipatlas.csv" \
78+
--ground_truth_remap "resources/grn_benchmark/ground_truth/${cell_type}_remap.csv" \
7079
--layer "${layer}" \
7180
--reg_type "${reg_type}" \
7281
--num_workers "${num_workers}" \

src/metrics/regression/config.vsh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ info:
88
description: |
99
Calculates regression scores 2
1010
metrics:
11-
- name: r2-theta-0.0
11+
- name: r2-theta-0.1
1212
label: R2 (precision)
1313
summary: Captures the perfomance for the top regulatory links
1414
description: |

src/metrics/regression/consensus/helper.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,10 @@
1010
from util import process_links
1111
def main(par):
1212
print(par)
13-
# Load perturbation data
1413
adata_rna = anndata.read_h5ad(par['evaluation_data'])
1514
gene_names = adata_rna.var_names
1615

1716
gene_dict = {gene_name: i for i, gene_name in enumerate(gene_names)}
18-
19-
# Load inferred GRNs
2017
grns = []
2118
for filepath in par['predictions']:
2219
net = ad.read_h5ad(filepath)

0 commit comments

Comments
 (0)