Skip to content

Commit 3fba9bf

Browse files
committed
add concat csv for cosine similarity
1 parent 706541d commit 3fba9bf

File tree

8 files changed

+56
-52
lines changed

8 files changed

+56
-52
lines changed

conf/modules.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ process {
145145
}
146146

147147
withName: "STIMULUS_COMPARE_TENSORS_COSINE" {
148-
ext.args = { "--mode cosine" }
148+
ext.args = { "--mode cosine_similarity" }
149149
}
150150

151151
withName: "CONCAT_COSINE" {
@@ -154,7 +154,7 @@ process {
154154
path: { "${params.outdir}/evaluation_results/" },
155155
mode: params.publish_dir_mode,
156156
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
157-
]
157+
]
158158
}
159159
}
160160

conf/test_noise_eval.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ params {
2121
model_config = params.pipelines_testdata_base_path + 'deepmodeloptim/testdata/titanic/titanic_model.yaml'
2222

2323
// tune parameters
24-
tune_trials_range = "2,4,2"
24+
tune_trials_range = "2,2,2"
2525
tune_replicates = 2
2626

2727
// predict data

modules/local/stimulus/check_model/main.nf

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ process CHECK_MODEL {
44
label 'process_medium'
55
// TODO: push image to nf-core quay.io
66
container "docker.io/mathysgrapotte/stimulus-py:dev"
7-
containerOptions '--shm-size=2gb'
87

98
input:
109
tuple val(meta), path(data_config)
@@ -15,7 +14,6 @@ process CHECK_MODEL {
1514

1615
output:
1716
stdout emit: standardout
18-
path "versions.yml" , emit: versions
1917

2018
script:
2119
def args = task.ext.args ?: ''

modules/local/stimulus/compare_tensors/main.nf

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ process STIMULUS_COMPARE_TENSORS {
77
tuple val(meta), path(tensors)
88

99
output:
10-
tuple val(meta), path("${prefix}.csv"), emit: csv
10+
tuple val(meta), path("${prefix}_scores.csv"), emit: csv
1111
path "versions.yml" , emit: versions
1212

1313
script:
@@ -17,18 +17,19 @@ process STIMULUS_COMPARE_TENSORS {
1717
def values = meta.values().join(",")
1818
"""
1919
stimulus compare-tensors \
20-
-t ${tensors} \
21-
${args} >> scores.txt
20+
${tensors} \
21+
-s scores.csv \
22+
${args}
2223
23-
# Extract first row of scores.txt
24-
header_scores=\$(head -n 1 scores.txt)
24+
# Extract first row of scores.csv
25+
header_scores=\$(head -n 1 scores.csv)
2526
2627
# Add metadata info to output file
27-
echo "${header},\$header_scores" > "${prefix}.scores"
28+
echo "${header},\$header_scores" > "${prefix}_scores.csv"
2829
2930
# Add values
30-
scores=\$(awk '{sub(/[[:space:]]+\$/, "")} 1' scores.txt | tr -s '[:blank:]' ',')
31-
echo "${values},\$scores" >> "${prefix}.scores"
31+
scores=\$(awk 'NR==2 {sub(/[[:space:]]+\$/, "")} NR==2' scores.csv | tr -s '[:blank:]' ',')
32+
echo "${values},\$scores" >> "${prefix}_scores.csv"
3233
3334
cat <<-END_VERSIONS > versions.yml
3435
"${task.process}":
@@ -40,7 +41,7 @@ process STIMULUS_COMPARE_TENSORS {
4041
prefix = task.ext.prefix ?: meta.id
4142
"""
4243
touch ${prefix}.csv
43-
44+
4445
cat <<-END_VERSIONS > versions.yml
4546
"${task.process}":
4647
stimulus: \$(stimulus -v | cut -d ' ' -f 3)

modules/local/stimulus/predict/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ process STIMULUS_PREDICT {
3434
prefix = task.ext.prefix ?: meta.id
3535
"""
3636
touch ${prefix}-pred.safetensors
37-
37+
3838
cat <<-END_VERSIONS > versions.yml
3939
"${task.process}":
4040
stimulus: \$(stimulus -v | cut -d ' ' -f 3)

subworkflows/local/evaluation/main.nf

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ workflow EVALUATION_WF {
2424

2525
ch_versions = Channel.empty()
2626

27-
//
28-
// Evaluation mode 1: Predict the data using the best model
27+
//
28+
// Evaluation mode 1: Predict the data using the best model
2929
// and then compare the predictions of 2 different models
3030
//
3131

@@ -36,44 +36,49 @@ workflow EVALUATION_WF {
3636
ch_versions = ch_versions.mix(STIMULUS_PREDICT.out.versions)
3737
predictions = STIMULUS_PREDICT.out.predictions
3838

39-
// Now we can estimate the noise across replicates
39+
// Now we can estimate the noise across replicates
4040
// This means: given a fixed initial model, initial data, and initial weights
4141
// and the same number of trials, we can estimate the noise across replicates
4242
// This is done by comparing the predictions of the alternative models between each other
4343
// and then calculatin a summary metric over them (e.g. mean, median, std, etc.)
4444

4545
replicate_predictions = predictions.map{
46-
meta, prediction ->
46+
meta, prediction ->
4747
[["id": meta.id,
4848
"split_id": meta.split_id,
4949
"transform_id": meta.transform_id,
5050
"n_trials": meta.n_trials ], meta, prediction]
5151
}.groupTuple(by:0)
5252
.map{
53-
merging_meta, metas, predictions ->
53+
merging_meta, metas, predictions ->
5454
[merging_meta, predictions]
5555
}
5656

57-
//STIMULUS_COMPARE_TENSORS_COSINE(
58-
// replicate_predictions
59-
//)
60-
61-
//cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.scores
62-
63-
//cosine_scores
64-
//.map {
65-
// meta, csv -> csv
66-
//}
67-
//.collect()
68-
//.map {
69-
// csv ->
70-
// [ [ id:"summary_cosine" ], csv ]
71-
//}
72-
//.set { ch_cosine_summary }
73-
//CONCAT_COSINE (ch_cosine_summary, "csv", "csv")
57+
// check if the predictions are at least 2, meta,predictions
58+
replicate_predictions.filter{
59+
it[1].size() > 1
60+
}.set{ replicate_predictions }
7461

62+
STIMULUS_COMPARE_TENSORS_COSINE(
63+
replicate_predictions
64+
)
7565

76-
emit:
66+
cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.csv
67+
68+
cosine_scores
69+
.map {
70+
meta, csv -> csv
71+
}
72+
.collect()
73+
.map {
74+
csv ->
75+
[ [ id:"summary_cosine" ], csv ]
76+
}
77+
.set { ch_cosine_summary }
78+
CONCAT_COSINE (ch_cosine_summary, "csv", "csv")
79+
80+
81+
emit:
7782
versions = ch_versions // channel: [ versions.yml ]
7883

79-
}
84+
}

subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ workflow PIPELINE_INITIALISATION {
119119

120120
//
121121
// Create the channels for the prediction data
122-
//
122+
//
123123
ch_prediction_data = params.prediction_data == null ?
124124
Channel.empty() :
125125
Channel.fromPath(params.prediction_data, checkIfExists: true)

workflows/deepmodeloptim.nf

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,19 @@ workflow DEEPMODELOPTIM {
106106
// Check model
107107
// ==============================================================================
108108

109-
CHECK_MODEL_WF (
110-
ch_transformed_data.first(),
111-
ch_yaml_sub_config.first(),
112-
ch_model,
113-
ch_model_config,
114-
ch_initial_weights
115-
)
109+
// CHECK_MODEL_WF (
110+
// ch_transformed_data.first(),
111+
// ch_yaml_sub_config.first(),
112+
// ch_model,
113+
// ch_model_config,
114+
// ch_initial_weights
115+
// )
116116

117117
// ==============================================================================
118118
// Tune model
119119
// ==============================================================================
120120
// Create dependancy WF dependency to ensure TUNE_WF runs after CHECK_MODEL_WF finished
121-
ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
121+
//ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
122122

123123
TUNE_WF(
124124
ch_transformed_data,
@@ -131,13 +131,13 @@ workflow DEEPMODELOPTIM {
131131
)
132132

133133
// ==============================================================================
134-
// Evaluation
134+
// Evaluation
135135
// ==============================================================================
136-
136+
137137
// Now the data config will not work if passed in full
138-
// We need to pass in the split data config, any of them, for the predict modules
138+
// We need to pass in the split data config, any of them, for the predict modules
139139
// This will be changed in the future
140-
prediction_data = prediction_data.combine(TUNE_WF.out.data_config.first().map{meta,file -> file})
140+
prediction_data = prediction_data.combine(TUNE_WF.out.data_config.first().map{meta,file -> file})
141141
EVALUATION_WF(
142142
TUNE_WF.out.best_model,
143143
prediction_data

0 commit comments

Comments
 (0)