Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions conf/test_noise_eval.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ params {
save_data = false
}

env {
HF_DATASETS_CACHE = '/tmp/hf_cache'
}

// Limit resources so that this can run on GitHub Actions
process {
maxRetries = params.max_retries
Expand Down
1 change: 0 additions & 1 deletion modules/local/stimulus/check_model/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ process CHECK_MODEL {
def args = task.ext.args ?: ''
"""
stimulus check-model \
-e ${data_config} \
-d ${data} \
-m ${model} \
-c ${model_config} \
Expand Down
4 changes: 2 additions & 2 deletions modules/local/stimulus/compare_tensors/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process STIMULUS_COMPARE_TENSORS {
tag "${meta.id}"
tag "${meta.id1}"
label 'process_medium'
container "docker.io/mathysgrapotte/stimulus-py:dev"

Expand All @@ -18,7 +18,7 @@ process STIMULUS_COMPARE_TENSORS {
"""
stimulus compare-tensors \
${tensors} \
-s scores.csv \
-o scores.csv \
${args}

# Extract first row of scores.csv
Expand Down
41 changes: 41 additions & 0 deletions modules/local/stimulus/encode/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process ENCODE_CSV {

tag "${meta.id}"
label 'process_medium'
// TODO: push image to nf-core quay.io
container "docker.io/mathysgrapotte/stimulus-py:dev"

input:
tuple val(meta), path(data)
tuple val(meta2), path(config)

output:
tuple val(meta2), path("${prefix}_encoded"), emit: encoded
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.split_id}-${meta2.transform_id}"
"""
stimulus encode-csv \
-d ${data} \
-y ${config} \
-o ${prefix}_encoded \
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
stimulus: \$(stimulus -v | cut -d ' ' -f 3)
END_VERSIONS
"""

stub:
"""
echo passing check-model stub

cat <<-END_VERSIONS > versions.yml
"${task.process}":
stimulus: \$(stimulus -v | cut -d ' ' -f 3)
END_VERSIONS
"""
}
3 changes: 1 addition & 2 deletions modules/local/stimulus/predict/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ process STIMULUS_PREDICT {

input:
tuple val(meta) , path(model), path(model_config), path(weigths)
tuple val(meta2), path(data), path(data_config)
tuple val(meta2), path(data), path(config)

output:
tuple val(meta), path("${prefix}-pred.safetensors"), emit: predictions
Expand All @@ -17,7 +17,6 @@ process STIMULUS_PREDICT {
"""
stimulus predict \
-d ${data} \
-e ${data_config} \
-m ${model} \
-c ${model_config} \
-w ${weigths} \
Expand Down
6 changes: 3 additions & 3 deletions modules/local/stimulus/split_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process STIMULUS_SPLIT_DATA {
tuple val(meta2), path(sub_config)

output:
tuple val(meta2), path("${prefix}.csv"), emit: csv_with_split
tuple val(meta2), path("${prefix}_split"), emit: csv_with_split
path "versions.yml" , emit: versions

script:
Expand All @@ -19,7 +19,7 @@ process STIMULUS_SPLIT_DATA {
stimulus split-csv \
-c ${data} \
-y ${sub_config} \
-o ${prefix}.csv
-o ${prefix}_split

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -30,7 +30,7 @@ process STIMULUS_SPLIT_DATA {
stub:
prefix = task.ext.prefix ?: "${meta.id}-split-${meta2.id}"
"""
touch ${prefix}.csv
touch ${prefix}_split

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
4 changes: 2 additions & 2 deletions modules/local/stimulus/transform_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ process STIMULUS_TRANSFORM_CSV {
tuple val(meta2), path(config)

output:
tuple val(meta), path("${prefix}.csv"), emit: transformed_data
tuple val(meta), path("${prefix}"), emit: transformed_data
path "versions.yml" , emit: versions

script:
Expand All @@ -20,7 +20,7 @@ process STIMULUS_TRANSFORM_CSV {
stimulus transform-csv \
-c ${data} \
-y ${config} \
-o ${prefix}.csv
-o ${prefix}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
1 change: 0 additions & 1 deletion modules/local/stimulus/tune/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ process STIMULUS_TUNE {
stimulus tune \
-d ${transformed_data} \
-m ${model} \
-e ${data_sub_config} \
-c ${model_config} \
-o ${prefix}-best-model.safetensors \
-bo ${prefix}-best-optimizer.opt \
Expand Down
53 changes: 34 additions & 19 deletions subworkflows/local/evaluation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ workflow EVALUATION_WF {
// Evaluation mode 1: Predict the data using the best model
// and then compare the predictions of 2 different models
//

STIMULUS_PREDICT(
model,
ch_data.collect()
ch_data.first() // converts a queue channel to a value channel
)
ch_versions = ch_versions.mix(STIMULUS_PREDICT.out.versions)
predictions = STIMULUS_PREDICT.out.predictions
Expand All @@ -41,26 +40,42 @@ workflow EVALUATION_WF {
// and the same number of trials, we can estimate the noise across replicates
// This is done by comparing the predictions of the alternative models between each other
// and then calculatin a summary metric over them (e.g. mean, median, std, etc.)

replicate_predictions = predictions.map{
meta, prediction ->
[["id": meta.id,
"split_id": meta.split_id,
"transform_id": meta.transform_id,
"n_trials": meta.n_trials ], meta, prediction]
}.groupTuple(by:0)
.map{
merging_meta, metas, predictions ->
[merging_meta, predictions]
pairs = predictions
.collate(2)
.collect()
.map { items ->
def pairs = []
// Create all unique combinations using index comparison
(0..<items.size()).each { i ->
(i+1..<items.size()).each { j ->
def meta1 = items[i][0]
def meta2 = items[j][0]
def files = [items[i][1], items[j][1]]
// Only compare different transforms OR different replicates
if(meta1.transform_id != meta2.transform_id || meta1.replicate != meta2.replicate) {
pairs << [
[
"id1": meta1.id,
"id2": meta2.id,
"split_id1": meta1.split_id,
"split_id2": meta2.split_id,
"transform_id1": meta1.transform_id,
"transform_id2": meta2.transform_id,
"replicate1": meta1.replicate,
"replicate2": meta2.replicate
],
// Create unique filenames using both transforms and replicates
files
]
}
}
}

// check if the predictions are at least 2, meta,predictions
replicate_predictions.filter{
it[1].size() > 1
}.set{ replicate_predictions }
pairs
}
.flatMap { it }

STIMULUS_COMPARE_TENSORS_COSINE(
replicate_predictions
pairs
)

cosine_scores = STIMULUS_COMPARE_TENSORS_COSINE.out.csv
Expand Down
13 changes: 10 additions & 3 deletions subworkflows/local/transform_csv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

include { STIMULUS_TRANSFORM_CSV } from '../../../modules/local/stimulus/transform_csv'
include { ENCODE_CSV } from '../../../modules/local/stimulus/encode'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -48,17 +49,23 @@ workflow TRANSFORM_CSV_WF {
config:
[meta, config]
}

// run stimulus transform
STIMULUS_TRANSFORM_CSV(
ch_input.data,
ch_input.config
)
ch_transformed_data = STIMULUS_TRANSFORM_CSV.out.transformed_data
ch_versions = ch_versions.mix(STIMULUS_TRANSFORM_CSV.out.versions)

// run stimulus encode
ENCODE_CSV(
ch_transformed_data,
ch_input.config
)
ch_encoded_data = ENCODE_CSV.out.encoded
ch_versions = ch_versions.mix(ENCODE_CSV.out.versions)

emit:
transformed_data = ch_transformed_data
transformed_data = ch_encoded_data
versions = ch_versions // channel: [ versions.yml ]
}

Expand Down
6 changes: 6 additions & 0 deletions workflows/deepmodeloptim.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include { SPLIT_CSV_WF } from '../subworkflows/local/spli
include { TRANSFORM_CSV_WF } from '../subworkflows/local/transform_csv'
include { TUNE_WF } from '../subworkflows/local/tune'
include { EVALUATION_WF } from '../subworkflows/local/evaluation'
include { ENCODE_CSV } from '../modules/local/stimulus/encode'

//
// MODULES: Consisting of nf-core/modules
Expand Down Expand Up @@ -149,6 +150,11 @@ workflow DEEPMODELOPTIM {
// Now the data config will not work if passed in full
// We need to pass in the split data config, any of them, for the predict modules
// This will be changed in the future
ENCODE_CSV(
prediction_data,
TUNE_WF.out.data_config_tmp.first()
)
prediction_data = ENCODE_CSV.out.encoded
prediction_data = prediction_data.combine(TUNE_WF.out.data_config_tmp.first().map{meta,file -> file})
EVALUATION_WF(
TUNE_WF.out.model_tmp,
Expand Down
Loading