@@ -47,8 +47,26 @@ workflow PROTEIN_DESIGN {
4747
4848 CONVERT_CIF_TO_PDB (ch_structures_for_conversion)
4949
50- // Step 2: Run ProteinMPNN on converted PDB structures
51- PROTEINMPNN_OPTIMIZE (CONVERT_CIF_TO_PDB . out. pdb_files)
50+ // Step 2: Parallelize ProteinMPNN - run separately for each budget design
51+ // Use flatMap to create individual tasks per PDB file (one per budget iteration)
52+ ch_pdb_per_design = CONVERT_CIF_TO_PDB . out. pdb_files_all
53+ .flatMap { meta, pdb_files ->
54+ // Convert to list if single file
55+ def pdb_list = pdb_files instanceof List ? pdb_files : [pdb_files]
56+
57+ // Create a separate channel entry for each PDB file
58+ pdb_list. collect { pdb_file ->
59+ def design_meta = [:]
60+ design_meta. id = " ${ meta.id} _${ pdb_file.baseName} "
61+ design_meta. parent_id = meta. id
62+ design_meta. design_name = pdb_file. baseName
63+
64+ [design_meta, pdb_file]
65+ }
66+ }
67+
68+ // Run ProteinMPNN on each design individually (parallel execution per budget design)
69+ PROTEINMPNN_OPTIMIZE (ch_pdb_per_design)
5270
5371 // Use ProteinMPNN optimized structures for downstream analyses
5472 ch_final_designs_for_analysis = PROTEINMPNN_OPTIMIZE . out. optimized_designs
@@ -61,9 +79,24 @@ workflow PROTEIN_DESIGN {
6179 ch_boltzgen_structures = BOLTZGEN_RUN . out. final_cifs
6280 EXTRACT_TARGET_SEQUENCES (ch_boltzgen_structures)
6381
64- // Combine ProteinMPNN FASTA outputs with target sequence
65- // Join based on parent_id (meta.parent_id from MPNN matches meta.id from Boltzgen)
66- ch_protenix_input = PROTEINMPNN_OPTIMIZE . out. sequences
82+ // Parallelize Protenix per FASTA file (one per ProteinMPNN sequence)
83+ // Each ProteinMPNN run generates multiple FASTA files (mpnn_num_seq_per_target)
84+ ch_protenix_per_sequence = PROTEINMPNN_OPTIMIZE . out. sequences
85+ .flatMap { meta, fasta_files ->
86+ // Convert to list if single file
87+ def fasta_list = fasta_files instanceof List ? fasta_files : [fasta_files]
88+
89+ // Create a separate entry for each FASTA file
90+ fasta_list. collect { fasta_file ->
91+ def seq_meta = [:]
92+ seq_meta. id = " ${ meta.id} _${ fasta_file.baseName} "
93+ seq_meta. parent_id = meta. parent_id
94+ seq_meta. mpnn_parent_id = meta. id
95+ seq_meta. sequence_name = fasta_file. baseName
96+
97+ [seq_meta, fasta_file]
98+ }
99+ }
67100 .map { meta, fasta ->
68101 [meta. parent_id, meta, fasta]
69102 }
@@ -76,8 +109,8 @@ workflow PROTEIN_DESIGN {
76109 [meta, fasta, target_seq]
77110 }
78111
79- // Run Protenix structure prediction on combined sequences
80- PROTENIX_REFOLD (ch_protenix_input )
112+ // Run Protenix structure prediction on each sequence individually
113+ PROTENIX_REFOLD (ch_protenix_per_sequence )
81114
82115 // ================================================================
83116 // Step 4: Convert Protenix confidence JSON to NPZ for ipSAE
0 commit comments