add concat csv for cosine similarity

luisas · luisas · commit 3fba9bf84bc3 · 2025-03-26T17:10:55.000+01:00
diff --git a/conf/modules.config b/conf/modules.config
@@ -145,7 +145,7 @@ process {
     }
 
     withName: "STIMULUS_COMPARE_TENSORS_COSINE" {
-        ext.args = { "--mode cosine" }
+        ext.args = { "--mode cosine_similarity" }
     }
 
     withName: "CONCAT_COSINE" {
@@ -154,7 +154,7 @@ process {
             path: { "${params.outdir}/evaluation_results/" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]        
+        ]
     }
 }
 
diff --git a/conf/test_noise_eval.config b/conf/test_noise_eval.config
@@ -21,7 +21,7 @@ params {
     model_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/titanic/titanic_model.yaml'
 
     // tune parameters
-    tune_trials_range = "2,4,2"
+    tune_trials_range = "2,2,2"
     tune_replicates   = 2
 
     // predict data
diff --git a/modules/local/stimulus/check_model/main.nf b/modules/local/stimulus/check_model/main.nf
@@ -4,7 +4,6 @@ process CHECK_MODEL {
     label 'process_medium'
     // TODO: push image to nf-core quay.io
     container "docker.io/mathysgrapotte/stimulus-py:dev"
-    containerOptions '--shm-size=2gb'
 
     input:
     tuple val(meta), path(data_config)
@@ -15,7 +14,6 @@ process CHECK_MODEL {
 
     output:
     stdout emit: standardout
-    path "versions.yml"          , emit: versions
 
     script:
     def args = task.ext.args ?: ''
diff --git a/modules/local/stimulus/compare_tensors/main.nf b/modules/local/stimulus/compare_tensors/main.nf
@@ -7,7 +7,7 @@ process STIMULUS_COMPARE_TENSORS {
     tuple val(meta), path(tensors)
 
     output:
-    tuple val(meta), path("${prefix}.csv"), emit: csv
+    tuple val(meta), path("${prefix}_scores.csv"), emit: csv
     path "versions.yml"          , emit: versions
 
     script:
@@ -17,18 +17,19 @@ process STIMULUS_COMPARE_TENSORS {
     def values = meta.values().join(",")
     """
     stimulus compare-tensors \
-        -t ${tensors} \
-        ${args} >> scores.txt
+        ${tensors} \
+        -s scores.csv \
+        ${args}
 
-    # Extract first row of scores.txt
-    header_scores=\$(head -n 1 scores.txt)
+    # Extract first row of scores.csv
+    header_scores=\$(head -n 1 scores.csv)
 
     # Add metadata info to output file
-    echo "${header},\$header_scores" > "${prefix}.scores"
+    echo "${header},\$header_scores" > "${prefix}_scores.csv"
 
     # Add values
-    scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',')
-    echo "${values},\$scores" >> "${prefix}.scores"
+    scores=\$(awk 'NR==2  {sub(/[[:space:]]+\$/, "")} NR==2' scores.csv | tr -s '[:blank:]' ',')
+    echo "${values},\$scores" >> "${prefix}_scores.csv"
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -40,7 +41,7 @@ process STIMULUS_COMPARE_TENSORS {
     prefix = task.ext.prefix ?: meta.id
     """
     touch ${prefix}.csv
-    
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         stimulus: \$(stimulus -v | cut -d ' ' -f 3)
diff --git a/modules/local/stimulus/predict/main.nf b/modules/local/stimulus/predict/main.nf
@@ -34,7 +34,7 @@ process STIMULUS_PREDICT {
     prefix = task.ext.prefix ?: meta.id
     """
     touch ${prefix}-pred.safetensors
-    
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         stimulus: \$(stimulus -v | cut -d ' ' -f 3)
diff --git a/subworkflows/local/evaluation/main.nf b/subworkflows/local/evaluation/main.nf
@@ -24,8 +24,8 @@ workflow EVALUATION_WF {
 
     ch_versions = Channel.empty()
 
-    // 
-    // Evaluation mode 1: Predict the data using the best model 
+    //
+    // Evaluation mode 1: Predict the data using the best model
     // and then compare the predictions of 2 different models
     //
 
@@ -36,44 +36,49 @@ workflow EVALUATION_WF {
     ch_versions = ch_versions.mix(STIMULUS_PREDICT.out.versions)
     predictions = STIMULUS_PREDICT.out.predictions
 
-    // Now we can estimate the noise across replicates 
+    // Now we can estimate the noise across replicates
     // This means: given a fixed initial model, initial data, and initial weights
     // and the same number of trials, we can estimate the noise across replicates
     // This is done by comparing the predictions of the alternative models between each other
     // and then calculatin a summary metric over them (e.g. mean, median, std, etc.)
 
     replicate_predictions = predictions.map{
-                meta, prediction -> 
+                meta, prediction ->
                     [["id": meta.id,
                     "split_id": meta.split_id,
                     "transform_id": meta.transform_id,
                     "n_trials": meta.n_trials ], meta, prediction]
             }.groupTuple(by:0)
             .map{
-                merging_meta, metas, predictions -> 
+                merging_meta, metas, predictions ->
                     [merging_meta, predictions]
             }
 
-    //STIMULUS_COMPARE_TENSORS_COSINE(
-    //    replicate_predictions
-    //)
-
-    //cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.scores
-
-    //cosine_scores
-    //.map {
-    //    meta, csv -> csv
-    //}
-    //.collect()
-    //.map {
-    //    csv ->
-    //        [ [ id:"summary_cosine" ], csv ]
-    //}
-    //.set { ch_cosine_summary }
-    //CONCAT_COSINE (ch_cosine_summary, "csv", "csv")
+    // check if the predictions are at least 2, meta,predictions
+    replicate_predictions.filter{
+        it[1].size() > 1
+    }.set{ replicate_predictions }
 
+    STIMULUS_COMPARE_TENSORS_COSINE(
+        replicate_predictions
+    )
 
-    emit: 
+    cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.csv
+
+    cosine_scores
+    .map {
+        meta, csv -> csv
+    }
+    .collect()
+    .map {
+        csv ->
+            [ [ id:"summary_cosine" ], csv ]
+    }
+    .set { ch_cosine_summary }
+    CONCAT_COSINE (ch_cosine_summary, "csv", "csv")
+    
+
+    emit:
     versions = ch_versions // channel: [ versions.yml ]
 
-}
+}
diff --git a/subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf b/subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf
@@ -119,7 +119,7 @@ workflow PIPELINE_INITIALISATION {
 
     //
     // Create the channels for the prediction data
-    //  
+    //
     ch_prediction_data = params.prediction_data == null ?
             Channel.empty() :
             Channel.fromPath(params.prediction_data, checkIfExists: true)
diff --git a/workflows/deepmodeloptim.nf b/workflows/deepmodeloptim.nf
@@ -106,19 +106,19 @@ workflow DEEPMODELOPTIM {
     // Check model
     // ==============================================================================
 
-    CHECK_MODEL_WF (
-        ch_transformed_data.first(),
-        ch_yaml_sub_config.first(),
-        ch_model,
-        ch_model_config,
-        ch_initial_weights
-    )
+    // CHECK_MODEL_WF (
+    //     ch_transformed_data.first(),
+    //     ch_yaml_sub_config.first(),
+    //     ch_model,
+    //     ch_model_config,
+    //     ch_initial_weights
+    // )
 
     // ==============================================================================
     // Tune model
     // ==============================================================================
     // Create dependancy WF dependency to ensure TUNE_WF runs after CHECK_MODEL_WF finished
-    ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
+    //ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
 
     TUNE_WF(
         ch_transformed_data,
@@ -131,13 +131,13 @@ workflow DEEPMODELOPTIM {
     )
 
     // ==============================================================================
-    // Evaluation 
+    // Evaluation
     // ==============================================================================
-    
+
     // Now the data config will not work if passed in full
-    // We need to pass in the split data config, any of them, for the predict modules 
+    // We need to pass in the split data config, any of them, for the predict modules
     // This will be changed in the future
-    prediction_data = prediction_data.combine(TUNE_WF.out.data_config.first().map{meta,file -> file}) 
+    prediction_data = prediction_data.combine(TUNE_WF.out.data_config.first().map{meta,file -> file})
     EVALUATION_WF(
         TUNE_WF.out.best_model,
         prediction_data

Original file line number	Diff line number	Diff line change
`@@ -145,7 +145,7 @@ process {`
`145`	`145`	`}`
`146`	`146`
`147`	`147`	`withName: "STIMULUS_COMPARE_TENSORS_COSINE" {`
`148`		`- ext.args = { "--mode cosine" }`
	`148`	`+ ext.args = { "--mode cosine_similarity" }`
`149`	`149`	`}`
`150`	`150`
`151`	`151`	`withName: "CONCAT_COSINE" {`
`@@ -154,7 +154,7 @@ process {`
`154`	`154`	`path: { "${params.outdir}/evaluation_results/" },`
`155`	`155`	`mode: params.publish_dir_mode,`
`156`	`156`	`saveAs: { filename -> filename.equals('versions.yml') ? null : filename }`
`157`		`- ]`
	`157`	`+ ]`
`158`	`158`	`}`
`159`	`159`	`}`
`160`	`160`