openproblems-bio
diff --git a/‎docs/source/images/raw_scores_op.png‎
72.3 KB b/‎docs/source/images/raw_scores_op.png‎
72.3 KB
diff --git a/‎docs/source/images/raw_scores_replogle.png‎
236 KB b/‎docs/source/images/raw_scores_replogle.png‎
236 KB
diff --git a/‎scripts/process_data/rest.sh‎
Lines changed: 14 additions & 14 deletions b/‎scripts/process_data/rest.sh‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎scripts/run_all.sh‎
Lines changed: 1 addition & 1 deletion b/‎scripts/run_all.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/run_grn_evaluation.sh‎
Lines changed: 1 addition & 0 deletions b/‎scripts/run_grn_evaluation.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/methods/pearson_corr/script.py‎
Lines changed: 3 additions & 1 deletion b/‎src/methods/pearson_corr/script.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/metrics/all_metrics/helper.py‎
Lines changed: 5 additions & 3 deletions b/‎src/metrics/all_metrics/helper.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/metrics/all_metrics/run_local.sh‎
Lines changed: 14 additions & 5 deletions b/‎src/metrics/all_metrics/run_local.sh‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎src/metrics/regression/config.vsh.yaml‎
Lines changed: 1 addition & 1 deletion b/‎src/metrics/regression/config.vsh.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/metrics/regression/consensus/helper.py‎
Lines changed: 0 additions & 3 deletions b/‎src/metrics/regression/consensus/helper.py‎
Lines changed: 0 additions & 3 deletions
@@ -4,28 +4,28 @@
 #SBATCH --error=logs/%j.err
 #SBATCH --ntasks=1
 #SBATCH --cpus-per-task=10
-#SBATCH --time=10:00:00
-#SBATCH --mem=1000GB
+#SBATCH --time=20:00:00
+#SBATCH --mem=500GB
 #SBATCH --partition=cpu
 #SBATCH --mail-type=END,FAIL      
 #SBATCH --mail-user=jalil.nourisa@gmail.com   
 
 
 set -e
 
-python src/process_data/adamson/script.py 
-python src/process_data/nakatake/script.py 
-python src/process_data/norman/script.py
+# python src/process_data/main/adamson/script.py 
+# python src/process_data/main/nakatake/script.py 
+# python src/process_data/main/norman/script.py
 
-echo "Processing opsca"
-python src/process_data/opsca/script.py 
+# echo "Processing opsca"
+# python src/process_data/main/opsca/script.py 
 echo "Processing replogle"
-python src/process_data/replogle/script.py  #--run_test  #--run_test
-echo "Processing xaira"
-python src/process_data/xaira/script.py    #--run_test
+# python src/process_data/main/replogle/script.py  #--run_test  #--run_test
+# echo "Processing xaira"
+python src/process_data/main/xaira/script.py    #--run_test
 
 
-echo "Processing 300BCG"
-python src/process_data/300BCG/script.py 
-echo "Processing IBD"
-python src/process_data/ibd/script.py 
+# echo "Processing 300BCG"
+# python src/process_data/main/300BCG/script.py 
+# echo "Processing IBD"
+# python src/process_data/main/ibd/script.py 
@@ -1,6 +1,6 @@
 set -e
 
-datasets=('replogle' 'op') #'replogle' 'op' 'nakatake' 'adamson' 'norman'  'xaira_HEK293T' 'xaira_HCT116'  'parsebioscience' 'ibd' '300BCG') #
+datasets=( 'xaira_HEK293T' 'xaira_HCT116' ) #'replogle' 'op' 'nakatake' 'adamson' 'norman'  'xaira_HEK293T' 'xaira_HCT116'  'parsebioscience' 'ibd' '300BCG') #
 
 run_local=false # set to true to run locally, false to run on AWS
 
 
@@ -106,6 +106,7 @@ echo "Generating dataset configuration..."
 python src/utils/config.py --output src/utils/dataset_config.env
 source src/utils/dataset_config.env
 
+
 if [ "$RUN_LOCAL" = true ]; then
   cat >> "$param_local" << HERE
 param_list:
 
@@ -10,7 +10,7 @@
 
 ## VIASH START
 par = {
-    'rna': 'resources/grn_benchmark/inference_data//op_rna.h5ad',
+    'rna': 'resources/grn_benchmark/inference_data//replogle_rna.h5ad',
     'tf_all': 'resources/grn_benchmark/prior/tf_all.csv',
     'cell_type_specific': False,
     'max_n_links': 50000,
@@ -40,6 +40,8 @@ def main(par):
     print('Output GRN')
     print('Shape of the network:', net.shape)
     print(net.sort_values('weight', ascending=False, key=abs).head(10))
+    print(net['source'].nunique(), 'TFs')
+    print('Num neg signs?:', (net['weight'] < 0).sum())
     net = net.astype(str)
     output = ad.AnnData(
         X=None,
 
@@ -74,10 +74,10 @@ def replica_consistency_metric(par, dataset_id):
             return output
     return None
 
-def reg2_metric(par, dataset_id):
+def reg_metric(par, dataset_id):
     if dataset_id in DATASETS_METRICS:
         if 'regression' in DATASETS_METRICS[dataset_id]:
-            output = main_reg(par)
+            _ , output = main_reg(par)
             return output
     return None
 
@@ -92,7 +92,9 @@ def ws_distance_metric(par, dataset_id):
 def main(par):
     dataset_id = ad.read_h5ad(par['evaluation_data'], backed='r').uns['dataset_id']
     rr_store = []
-    metrics = [reg2_metric, ws_distance_metric, sem_metric, tf_rec_metric, replica_consistency_metric]
+    metrics = [reg_metric, ws_distance_metric, sem_metric, tf_rec_metric, replica_consistency_metric, tf_binding_metric]
+    # metrics = [tf_binding_metric, sem_metric, replica_consistency_metric]
+
     for metric in metrics:
         print(f"Computing metric: {metric.__name__}")
         rr = metric(par, dataset_id)
 
@@ -16,6 +16,9 @@ set -euo pipefail
 layer="lognorm" 
 reg_type="ridge"
 num_workers=20
+prediction="output/net.h5ad"
+score="output/score.h5ad"
+dataset="op"
 
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -53,12 +56,15 @@ if [[ -n "$layer" ]]; then
   echo "Layer is set to: $layer"
 fi
 
-# Check required args
-if [[ -z "${dataset:-}" || -z "${prediction:-}" || -z "${score:-}" ]]; then
-  echo "Usage: $0 --dataset <name> --prediction <file> --score <metric>"
-  exit 1
-fi
+# # Check required args
+# if [[ -z "${dataset:-}" || -z "${prediction:-}" || -z "${score:-}" ]]; then
+#   echo "Usage: $0 --dataset <name> --prediction <file> --score <metric>"
+#   exit 1
+# fi
 
+source src/utils/dataset_config.env
+cell_type_var="CELLTYPE_${dataset}"
+cell_type="${!cell_type_var}"
 
 # Run metrics
 python src/metrics/all_metrics/script.py \
@@ -67,6 +73,9 @@ python src/metrics/all_metrics/script.py \
   --evaluation_data_sc "resources/grn_benchmark/evaluation_data/${dataset}_sc.h5ad" \
   --evaluation_data_de "resources/grn_benchmark/evaluation_data/${dataset}_de.h5ad" \
   --regulators_consensus "resources/grn_benchmark/prior/regulators_consensus_${dataset}.json" \
+  --ground_truth_unibind "resources/grn_benchmark/ground_truth/${cell_type}_unibind.csv" \
+  --ground_truth_chipatlas "resources/grn_benchmark/ground_truth/${cell_type}_chipatlas.csv" \
+  --ground_truth_remap "resources/grn_benchmark/ground_truth/${cell_type}_remap.csv" \
   --layer "${layer}" \
   --reg_type "${reg_type}" \
   --num_workers "${num_workers}" \
 
@@ -8,7 +8,7 @@ info:
   description: |
     Calculates regression scores 2
   metrics:
-    - name: r2-theta-0.0
+    - name: r2-theta-0.1
       label: R2 (precision)
       summary: Captures the perfomance for the top regulatory links
       description: |
 
@@ -10,13 +10,10 @@
 from util import process_links
 def main(par):
     print(par)
-    # Load perturbation data
     adata_rna = anndata.read_h5ad(par['evaluation_data'])
     gene_names = adata_rna.var_names
 
     gene_dict = {gene_name: i for i, gene_name in enumerate(gene_names)}
-
-    # Load inferred GRNs
     grns = []
     for filepath in par['predictions']:
         net = ad.read_h5ad(filepath)