Skip to content

Commit 9a55ed2

Browse files
committed
Fixed parameters and error in boltz2 message printing.
1 parent 5736212 commit 9a55ed2

File tree

10 files changed

+81
-52
lines changed

10 files changed

+81
-52
lines changed

assets/schema_input_design.json

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,17 @@
5252
"target_sequence": {
5353
"type": "string",
5454
"errorMessage": "Target sequence must be a valid file path to a FASTA file containing the target protein sequence"
55+
},
56+
"target_template": {
57+
"type": "string",
58+
"pattern": "^\\S+\\.cif$",
59+
"errorMessage": "Target template must be a valid file path to a CIF file (e.g., 'target_structure.cif')"
5560
}
5661
},
57-
"required": ["sample_id", "design_yaml", "target_sequence"]
62+
"required": [
63+
"sample_id",
64+
"design_yaml",
65+
"target_sequence"
66+
]
5867
}
59-
}
68+
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence
2-
design1_nano,assets/test_data/nipah_nanobody_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,nanobody-anything,3,2,,assets/test_data/nipah_glycoprotein_msa_Uniref30_2302.a3m,assets/test_data/nipah_virus_target_sequence_glycoproteinG.fasta
1+
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence,target_template
2+
design1_nano,assets/test_data/nipah_nanobody_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,nanobody-anything,3,2,,assets/test_data/nipah_glycoprotein_msa_Uniref30_2302.a3m,assets/test_data/nipah_virus_target_sequence_glycoproteinG.fasta,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence
2-
design1_pep,assets/test_data/nipah_peptide_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,peptide-anything,3,2,,,assets/test_data/2VSM_target_sequence.fa
1+
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence,target_template
2+
design1_pep,assets/test_data/nipah_peptide_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,peptide-anything,3,2,,,assets/test_data/2VSM_target_sequence.fa,
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence
2-
design1_prot,assets/test_data/nipah_protein_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,protein-anything,3,2,,assets/test_data/nipah_glycoprotein_msa_Uniref30_2302.a3m,assets/test_data/nipah_virus_target_sequence_glycoproteinG.fasta
1+
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa,target_sequence,target_template
2+
design1_prot,assets/test_data/nipah_protein_design.yaml,assets/test_data/nipah_virus_Glycoprotein_competition_structure.cif,protein-anything,3,2,,assets/test_data/nipah_glycoprotein_msa_Uniref30_2302.a3m,assets/test_data/nipah_virus_target_sequence_glycoproteinG.fasta,

bin/prepare_boltz2_input.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def parse_args():
1010
parser.add_argument('--mpnn_sequences', required=True, help='Path to ProteinMPNN sequences FASTA file')
1111
parser.add_argument('--target_sequence', required=True, help='Target sequence string')
1212
parser.add_argument('--target_msa', help='Path to target MSA file')
13+
parser.add_argument('--target_template', help='Path to target template CIF file')
1314
parser.add_argument('--meta_id', required=True, help='ID for the current design')
1415
parser.add_argument('--parent_id', required=True, help='Parent ID')
1516
parser.add_argument('--predict_affinity', action='store_true', help='Enable affinity prediction')
@@ -34,6 +35,13 @@ def main():
3435
has_target_msa = True
3536
target_msa_path = args.target_msa
3637

38+
# Check if target template is provided and valid
39+
has_target_template = False
40+
target_template_path = None
41+
if args.target_template and args.target_template != 'NO_TEMPLATE' and os.path.exists(args.target_template):
42+
has_target_template = True
43+
target_template_path = args.target_template
44+
3745
os.makedirs(args.output_dir, exist_ok=True)
3846

3947
# Process each FASTA file
@@ -104,6 +112,18 @@ def main():
104112
if has_target_msa and target_msa_path:
105113
target_entry['protein']['msa'] = os.path.abspath(target_msa_path)
106114
print(f" Adding target MSA: {target_msa_path}")
115+
116+
# Add template configuration if provided
117+
# Template is always applied to chain B (target) with force=true and threshold=1.0
118+
if has_target_template and target_template_path:
119+
target_entry['protein']['templates'] = [{
120+
'cif': os.path.abspath(target_template_path),
121+
'force': True,
122+
'threshold': 1.0,
123+
'chain_id': 'B'
124+
}]
125+
print(f" Adding target template: {target_template_path} (force=true, threshold=1.0)")
126+
107127
# Build final YAML input with exactly two entries
108128
boltz2_input = {
109129
'version': 1,
@@ -127,7 +147,7 @@ def main():
127147
yaml.dump(boltz2_input, yf, default_flow_style=False)
128148

129149
print(f" Created YAML input: {yaml_file}")
130-
print(f" Binder length: {len(binder_seq)}")
150+
print(f" Binder length: {len(binder_seq_clean)}")
131151
print(f" Target length: {len(target_seq)}")
132152
print(f" Target MSA: {'Yes' if has_target_msa else 'No (will use sequence only)'}")
133153
print(f" Binder MSA: Inferred automatically by Boltz-2")

main.nf

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ workflow NFPROTEINDESIGN {
106106
.fromList(design_samplesheet)
107107
.map { tuple ->
108108
// samplesheetToList returns list of values in schema order
109-
// Order: sample_id, design_yaml, structure_files, protocol, num_designs, budget, reuse, target_msa, target_sequence
109+
// Order: sample_id, design_yaml, structure_files, protocol, num_designs, budget, reuse, target_msa, target_sequence, target_template
110110
def sample_id = tuple[0]
111111
def design_yaml_path = tuple[1]
112112
def structure_files_str = tuple[2]
@@ -116,6 +116,7 @@ workflow NFPROTEINDESIGN {
116116
def reuse = tuple.size() > 6 ? tuple[6] : null
117117
def target_msa_path = tuple.size() > 7 ? tuple[7] : null
118118
def target_sequence_path = tuple.size() > 8 ? tuple[8] : null
119+
def target_template_path = tuple.size() > 9 ? tuple[9] : null
119120

120121
// Convert design YAML to file object and validate existence
121122
// Smart path resolution: try launchDir first (for local runs), then projectDir (for Platform)
@@ -182,14 +183,29 @@ workflow NFPROTEINDESIGN {
182183
}
183184
}
184185

186+
// Parse target template CIF file (optional for Boltz2 refolding)
187+
def target_template = null
188+
if (target_template_path) {
189+
if (target_template_path.startsWith('/') || target_template_path.contains('://')) {
190+
target_template = file(target_template_path, checkIfExists: true)
191+
} else {
192+
def launchDir_path = file(target_template_path)
193+
if (launchDir_path.exists()) {
194+
target_template = launchDir_path
195+
} else {
196+
target_template = file("${project_dir}/${target_template_path}", checkIfExists: true)
197+
}
198+
}
199+
}
200+
185201
def meta = [:]
186202
meta.id = sample_id
187203
meta.protocol = protocol ?: params.protocol
188204
meta.num_designs = num_designs ?: params.num_designs
189205
meta.budget = budget ?: params.budget
190206
meta.reuse = reuse ?: false
191207

192-
[meta, design_yaml, structure_files, target_msa, target_sequence]
208+
[meta, design_yaml, structure_files, target_msa, target_sequence, target_template]
193209
}
194210

195211
// ========================================================================

modules/local/boltz2_refold.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ process BOLTZ2_REFOLD {
2727
accelerator 1, type: 'nvidia-gpu'
2828

2929
input:
30-
tuple val(meta), path(mpnn_sequences), path(target_sequence_file), path(target_msa)
30+
tuple val(meta), path(mpnn_sequences), path(target_sequence_file), path(target_msa), path(target_template)
3131
path cache_dir
3232

3333
output:
@@ -44,6 +44,7 @@ process BOLTZ2_REFOLD {
4444
def num_recycling = params.boltz2_num_recycling ?: 3
4545
def num_diffusion = params.boltz2_num_diffusion ?: 5
4646
def has_target_msa = target_msa.name != 'NO_MSA'
47+
def has_target_template = target_template.name != 'NO_TEMPLATE'
4748
"""
4849
#!/bin/bash
4950
set -euo pipefail
@@ -89,6 +90,7 @@ process BOLTZ2_REFOLD {
8990
--mpnn_sequences "${mpnn_sequences}" \\
9091
--target_sequence "\$TARGET_SEQ" \\
9192
--target_msa "${target_msa}" \\
93+
--target_template "${target_template}" \\
9294
--meta_id "${meta.id}" \\
9395
--parent_id "${meta.parent_id}" \\
9496
--output_dir "yaml_inputs" \\

nextflow.config

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,14 @@ params {
2020
// ========================================================================
2121
// Design and Boltzgen parameters
2222
// ========================================================================
23-
// IMPORTANT: The following parameters should be specified in your samplesheet:
23+
// IMPORTANT: The following parameters must be specified in your samplesheet:
2424
// - protocol: Boltzgen protocol (protein-anything, peptide-anything, etc.)
2525
// - num_designs: Number of intermediate designs to generate
2626
// - budget: Number of designs in final diversity-optimized set
2727
//
2828
// This design ensures explicit per-sample control and eliminates ambiguity.
2929
// See samplesheet schema and examples in assets/test_data/ for details.
3030
// ========================================================================
31-
num_designs = null // Number of intermediate designs (typically specified per-sample in samplesheet)
32-
budget = null // Number of final budget designs (typically specified per-sample in samplesheet)
3331

3432
// Boltzgen advanced options
3533
cache_dir = null // Cache directory for model weights (~6GB), defaults to ~/.cache
@@ -41,7 +39,7 @@ params {
4139
mpnn_sampling_temp = 0.1 // Sampling temperature (0.1-0.3 recommended, lower = more conservative)
4240
mpnn_num_seq_per_target = 8 // Number of sequence variants to generate per structure
4341
mpnn_batch_size = 1 // Batch size for ProteinMPNN inference
44-
mpnn_seed = 37 // Random seed for reproducibility
42+
mpnn_seed = 37 // Random seed for reproducibility
4543
mpnn_backbone_noise = 0.02 // Backbone noise level (0.02-0.20, lower = more faithful to input)
4644
mpnn_save_score = true // Save per-residue scores
4745
mpnn_save_probs = false // Save per-residue probabilities (large files, use for detailed analysis)

nextflow_schema.json

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"mimetype": "text/csv",
2323
"pattern": "^\\S+\\.csv$",
2424
"description": "Path to comma-separated samplesheet file.",
25-
"help_text": "The samplesheet must contain: `sample`, `design_yaml`, and optionally: `structure_files`, `protocol`, `num_designs`, `budget`, `reuse`\n\nSee schema file in assets/schema_input_design.json for detailed specifications.",
25+
"help_text": "The samplesheet must contain: `sample_id`, `design_yaml`, `target_sequence`, and optionally: `structure_files`, `protocol`, `num_designs`, `budget`, `reuse`, `target_msa`, `target_template`\n\nSee schema file in assets/schema_input_design.json for detailed specifications.",
2626
"fa_icon": "fas fa-file-csv"
2727
},
2828
"outdir": {
@@ -41,35 +41,6 @@
4141
"default": "",
4242
"fa_icon": "fas fa-cogs",
4343
"properties": {
44-
"protocol": {
45-
"type": "string",
46-
"default": "protein-anything",
47-
"description": "Boltzgen design protocol.",
48-
"help_text": "Available protocols:\n- **protein-anything**: General protein design\n- **peptide-anything**: Peptide binder design\n- **protein-small_molecule**: Protein-small molecule interaction\n- **nanobody-anything**: Nanobody design",
49-
"enum": [
50-
"protein-anything",
51-
"peptide-anything",
52-
"protein-small_molecule",
53-
"nanobody-anything"
54-
],
55-
"fa_icon": "fas fa-flask"
56-
},
57-
"num_designs": {
58-
"type": "integer",
59-
"default": 100,
60-
"description": "Number of intermediate designs to generate.",
61-
"help_text": "For production runs, recommend 10,000-60,000 designs. Lower values (100-1000) suitable for testing.",
62-
"fa_icon": "fas fa-hashtag",
63-
"minimum": 1
64-
},
65-
"budget": {
66-
"type": "integer",
67-
"default": 10,
68-
"description": "Number of designs in final diversity-optimized set.",
69-
"help_text": "Boltzgen will select this many diverse, high-quality designs from the intermediate pool.",
70-
"fa_icon": "fas fa-star",
71-
"minimum": 1
72-
},
7344
"cache_dir": {
7445
"type": "string",
7546
"format": "directory-path",

workflows/protein_design.nf

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ include { CONSOLIDATE_METRICS } from '../modules/local/consolidate_metrics'
1919
workflow PROTEIN_DESIGN {
2020

2121
take:
22-
ch_input // channel: [meta, design_yaml, structure_files, target_msa, target_sequence]
22+
ch_input // channel: [meta, design_yaml, structure_files, target_msa, target_sequence, target_template]
2323
ch_cache // channel: path to cache directory or EMPTY_CACHE placeholder
2424
ch_boltz2_cache // channel: path to Boltz-2 cache directory or EMPTY_BOLTZ2_CACHE placeholder
2525

@@ -29,9 +29,9 @@ workflow PROTEIN_DESIGN {
2929
// Run Boltzgen on design YAMLs
3030
// ========================================================================
3131

32-
// Prepare Boltzgen input by removing target_msa and target_sequence (not needed for Boltzgen)
32+
// Prepare Boltzgen input by removing target_msa, target_sequence, and target_template (not needed for Boltzgen)
3333
ch_boltzgen_input = ch_input
34-
.map { meta, design_yaml, structure_files, target_msa, target_sequence ->
34+
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template ->
3535
[meta, design_yaml, structure_files]
3636
}
3737

@@ -86,7 +86,7 @@ workflow PROTEIN_DESIGN {
8686
if (params.run_boltz2_refold) {
8787
// Get target sequence FASTA from samplesheet
8888
ch_target_fasta = ch_input
89-
.map { meta, design_yaml, structure_files, target_msa, target_sequence ->
89+
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template ->
9090
[meta.id, target_sequence]
9191
}
9292

@@ -113,11 +113,20 @@ workflow PROTEIN_DESIGN {
113113
// Prepare Target MSA from Samplesheet
114114
// ================================================================
115115
ch_target_msa = ch_input
116-
.map { meta, design_yaml, structure_files, target_msa, target_sequence ->
116+
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template ->
117117
def msa_file = target_msa ?: file('NO_MSA')
118118
[meta.id, msa_file]
119119
}
120120

121+
// ================================================================
122+
// Prepare Target Template from Samplesheet
123+
// ================================================================
124+
ch_target_template = ch_input
125+
.map { meta, design_yaml, structure_files, target_msa, target_sequence, target_template ->
126+
def template_file = target_template ?: file('NO_TEMPLATE')
127+
[meta.id, template_file]
128+
}
129+
121130
// ================================================================
122131
// Create channel for Boltz-2 refolding
123132
// ================================================================
@@ -156,7 +165,11 @@ workflow PROTEIN_DESIGN {
156165
}
157166
.combine(ch_target_msa, by: 0)
158167
.map { parent_id, meta, fasta, target_seq, target_msa ->
159-
[meta, fasta, target_seq, target_msa]
168+
[meta.parent_id, meta, fasta, target_seq, target_msa]
169+
}
170+
.combine(ch_target_template, by: 0)
171+
.map { parent_id, meta, fasta, target_seq, target_msa, target_template ->
172+
[meta, fasta, target_seq, target_msa, target_template]
160173
}
161174

162175
// Run Boltz-2 structure prediction with target MSA

0 commit comments

Comments
 (0)