Skip to content

Commit c60dded

Browse files
committed
wip: Further patches to file processing with precomputed boltzgen results
1 parent 15e9396 commit c60dded

File tree

5 files changed

+43
-16
lines changed

5 files changed

+43
-16
lines changed

assets/NO_MSA

Whitespace-only changes.

assets/NO_TEMPLATE

Whitespace-only changes.

modules/local/boltz2_refold.nf

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,23 @@ process BOLTZ2_REFOLD {
198198
echo "Affinity predictions: \${AFFINITY_COUNT}"
199199
echo "Output directory: ${meta.id}_boltz2_output"
200200
echo "============================================"
201+
202+
# Fail if no structures were produced (critical for downstream processes)
203+
if [ "\${CIF_COUNT}" -eq 0 ]; then
204+
echo ""
205+
echo "ERROR: No CIF structures were produced by Boltz-2!"
206+
echo "This will cause downstream processes (IPSAE, PRODIGY) to have no input."
207+
echo ""
208+
echo "Debug info - checking boltz2_results directory structure:"
209+
find boltz2_results -type f 2>/dev/null | head -50 || echo "No boltz2_results directory found"
210+
exit 1
211+
fi
212+
213+
# Warn if no PAE files (needed for IPSAE)
214+
if [ "\${NPZ_COUNT}" -eq 0 ]; then
215+
echo ""
216+
echo "WARNING: No PAE NPZ files were produced. IPSAE will not be able to run."
217+
fi
201218
202219
# Create summary file
203220
cat > ${meta.id}_boltz2_output/prediction_summary.txt <<SUMMARY

modules/local/consolidate_metrics.nf

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,40 @@ process CONSOLIDATE_METRICS {
2121
script:
2222
def pae_cutoff = params.ipsae_pae_cutoff ?: 10
2323
def dist_cutoff = params.ipsae_dist_cutoff ?: 10
24-
def seq_dir_flag = sequence_files.name != 'NO_SEQUENCE_FILES' ? '--sequence_dir "sequences"' : ''
2524

2625
"""
2726
# Make script executable
2827
chmod +x ${consolidate_script}
2928
29+
# Create directories if they don't exist (handles empty input lists)
30+
mkdir -p ipsae prodigy foldseek sequences
31+
3032
# Debug: List staged files in subdirectories
3133
echo "=== Staged ipSAE files ==="
32-
ls -la ipsae/ 2>/dev/null || echo "No ipsae directory"
34+
ls -la ipsae/ 2>/dev/null || echo "No ipsae files"
3335
echo ""
3436
echo "=== Staged Prodigy files ==="
35-
ls -la prodigy/ 2>/dev/null || echo "No prodigy directory"
37+
ls -la prodigy/ 2>/dev/null || echo "No prodigy files"
3638
echo ""
3739
echo "=== Staged Foldseek files ==="
38-
ls -la foldseek/ 2>/dev/null || echo "No foldseek directory"
40+
ls -la foldseek/ 2>/dev/null || echo "No foldseek files"
3941
echo ""
4042
echo "=== Staged Sequence files ==="
41-
ls -la sequences/ 2>/dev/null || echo "No sequences directory"
43+
ls -la sequences/ 2>/dev/null || echo "No sequence files"
4244
echo ""
4345
46+
# Build command with optional sequence directory (only if files exist)
47+
SEQ_FLAG=""
48+
if [ -n "\$(ls -A sequences/ 2>/dev/null)" ]; then
49+
SEQ_FLAG="--sequence_dir sequences"
50+
fi
51+
4452
# Run consolidation script with staged subdirectories
4553
python ${consolidate_script} \\
4654
--ipsae_dir "ipsae" \\
4755
--prodigy_dir "prodigy" \\
4856
--foldseek_dir "foldseek" \\
49-
${seq_dir_flag} \\
57+
\${SEQ_FLAG} \\
5058
--output_html design_metrics_report.html \\
5159
--output_csv design_metrics_summary.csv \\
5260
--title "Protein Design Metrics Report" \\

workflows/protein_design.nf

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,10 @@ workflow PROTEIN_DESIGN {
141141
// ================================================================
142142
// Prepare Target MSA from Samplesheet
143143
// ================================================================
144+
// Use actual placeholder files in assets/ for k8s compatibility (avoids staging non-existent files)
144145
ch_target_msa = ch_input
145146
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template, boltzgen_output_dir ->
146-
def msa_file = target_msa ?: file('NO_MSA')
147+
def msa_file = target_msa ?: file("${projectDir}/assets/NO_MSA", checkIfExists: true)
147148
[meta.id, msa_file]
148149
}
149150

@@ -152,7 +153,7 @@ workflow PROTEIN_DESIGN {
152153
// ================================================================
153154
ch_target_template = ch_input
154155
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template, boltzgen_output_dir ->
155-
def template_file = target_template ?: file('NO_TEMPLATE')
156+
def template_file = target_template ?: file("${projectDir}/assets/NO_TEMPLATE", checkIfExists: true)
156157
[meta.id, template_file]
157158
}
158159

@@ -398,30 +399,31 @@ workflow PROTEIN_DESIGN {
398399

399400
// Collect output files from each analysis process
400401
// These will be staged into the consolidation task's work directory
402+
// Use empty lists [] instead of non-existent placeholder files for k8s compatibility
401403

402404
// ipSAE scores (the .txt files, not byres)
403405
ch_ipsae_files = (params.run_ipsae && params.run_proteinmpnn && params.run_boltz2_refold)
404406
? IPSAE_CALCULATE.out.scores
405407
.map { meta, file -> file }
406408
.collect()
407-
.ifEmpty { file('NO_IPSAE_FILES') }
408-
: Channel.value(file('NO_IPSAE_FILES'))
409+
.ifEmpty { [] }
410+
: Channel.value([])
409411

410412
// Prodigy results (.txt files)
411413
ch_prodigy_files = (params.run_prodigy && params.run_proteinmpnn && params.run_boltz2_refold)
412414
? PRODIGY_PREDICT.out.results
413415
.map { meta, file -> file }
414416
.collect()
415-
.ifEmpty { file('NO_PRODIGY_FILES') }
416-
: Channel.value(file('NO_PRODIGY_FILES'))
417+
.ifEmpty { [] }
418+
: Channel.value([])
417419

418420
// Foldseek summaries (.tsv files)
419421
ch_foldseek_files = (params.run_foldseek && params.run_proteinmpnn && params.run_boltz2_refold)
420422
? FOLDSEEK_SEARCH.out.summary
421423
.map { meta, file -> file }
422424
.collect()
423-
.ifEmpty { file('NO_FOLDSEEK_FILES') }
424-
: Channel.value(file('NO_FOLDSEEK_FILES'))
425+
.ifEmpty { [] }
426+
: Channel.value([])
425427

426428
// ====================================================================
427429
// Collect binder sequences from ProteinMPNN for the report
@@ -435,9 +437,9 @@ workflow PROTEIN_DESIGN {
435437
fasta_list.collect { fasta_file -> fasta_file }
436438
}
437439
.collect()
438-
.ifEmpty { file('NO_SEQUENCE_FILES') }
440+
.ifEmpty { [] }
439441
} else {
440-
ch_sequence_files = Channel.value(file('NO_SEQUENCE_FILES'))
442+
ch_sequence_files = Channel.value([])
441443
}
442444

443445
// Run consolidation with staged files

0 commit comments

Comments
 (0)