Skip to content

Commit d2e05d3

Browse files
committed
Fixed parallelization for Boltz2 folding after ProteinMPNN.
1 parent 8f282d1 commit d2e05d3

File tree

11 files changed

+201
-85
lines changed

11 files changed

+201
-85
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
>Query|-|Query
2+
ICLQKTSNQILKPKLISYTLGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCTH
3+
>UniRef100_A0A8T5UKK0 718 0.821 5.691E-231 0 410 413 5 418 433
4+
ICLQKTKDPILKPKLISYTLpvqQDRGICITDPLLAIDDGFFAYSHLEGIGSCKRGDSKQRIIGVGEVLDRGDYVPSLFMTNVWTPPNPMTVIHCSPVYCNQFYYVLCAVSNVGDPILNSTYWSDSLYITRLAVRPKSKKGPYNQHYIAINMTEDGRYDKVMPYGPSGIKQGDTLYFPAVGFLNRTEFTYNDSNCPITQCGYSKKENCRLSMGSPTNSHYILRSGLLKYNLSNGTDFKIQFIEITDNRLSIGSPSKIYKSLGQPVFYQASMSWDTMIKAGDVETVKPLTVNWRNNTVISRPGQSQCPRFNKCPEICWEGTYNDAFLIDRGNWMSAGVILDSNQTAENPVFTVFKDNEILYKERLAKDDTNAQKTITSCFLLLNEIWCISLVEIYDTGDKVIRPKLFAVKIPKQC--
5+
>UniRef100_UPI0005CF1739 700 0.805 8.121E-225 0 407 413 187 597 603
6+
ICLQKTKDPILKPKLISYTLpvqQDRGICITDPLLAIDDGFFAYSHLEKIGSCKRGDSKQRIIGVGEVLDRGDYVPSLFMTNVWTPPNPMTVIHCSPVYCNQFYYVLCAVSNVGDPILNSTYWSDSLYITRLAVRPKNGIDRYNQKKIAINNMEDGKYDKVMPYGPSGIKQGDTLYFPAVGFLNRTEFTYNDSNCPIIQCGYSKKENCRLSMGSPTNSHYILRSGLLKYNLSNMTDFKIQFIEITDNRLSIGSPSKIYKSLGQPVFYQASMSWDTMIKAGDVETVKPLTVNWRNNTVISRPGQSQCPRFNKCPEVCWEGTYNDAFLIDRGNWMSAGVILDSNQTAENPVFTVFKDNEILYKERLAKDDTNAQKTITSCFLLLNEIWCISLVEIYDTGDKVIRPKLFAVKIP-----
7+
>UniRef100_A4P076 207 0.279 1.647E-54 25 410 413 36 415 454
8+
-------------------------CILNPRLTISSTKFAYVHSEYDKNCTRGFKYYELMTFGEILEGPEKEPRMFSRSFYSPTNAVNYHSCTPIVTVNEGYFLCLECTSSDPLYKANLSNSTFHLVILRHNKDEKIVSMPSFNLSTDQ----EYVQIIPAEGGGTAESGNLYFPCIGRLLHK--RVTHPLCKKSNCSRTDDESCLKSYYNQGSPQHQVVNCLIRIRNAQRDNPTWDVITVDLTNTYPGSRSRIFGSFSKPMLYQSSVSWHTLLQVAEITDLDKYQLDWLDTPYISRPGGSECPFGNYCPTVCWEGTYNDVYSLTPNNDLFVTVYLKSEQVAENPYFAIFSRDQILKEFPLDAWISSARTTTISCFMFNNEIWCIAALEITRLNDDIIRPIYYSFWLPTDC--
9+
>UniRef100_UPI001362B71E 207 0.279 1.647E-54 25 410 413 231 610 632
10+
-------------------------CILNPRLTISSTKFAYVHSEYDKNCTRGFKYYELMTFGEILEGPEKEPRMFSRSFYSPTNAVNYHSCTPIVTVNEGYFLCLECTSSDPLYKANLSNSTFHLVILRHNKDEKIVSMPSFNLSTDQ----EYVQIIPAEGGGTAESGNLYFPCIGRLLHK--RVTHPLCKKSNCSRTDDESCLKSYYNQGSPQHQVVNCLIRIRNAQRDNPTWDVITVDLTNTYPGSRSRIFGSFSKPMLYQSSVSWHTLLQVAEITDLDKYQLDWLDTPYISRPGGSECPFGNYCPTVCWEGTYNDVYSLTPNNDLFVTVYLKSEQVAENPYFAIFSRDQILKEFPLDAWISSARTTTISCFMFNNEIWCIAALEITRLNDDIIRPIYYSFWLPTDC--
11+
>UniRef100_A0A4T0A5B2 133 0.235 6.916E-30 23 410 413 183 566 854
12+
-----------------------GGCMRDPSFDIGQKIFAYTHNVVDQGCQNEQQSTQYWSIGRITDVADDMPKFETLTQWYLNDGLNRKSCSVAVIDYGAWMLCIIMTESEEDDYQSPGIGRLFIGYMDIYGRKKSWILDESEISFDYKYAALYFSV----GSGIVSKGKVYFLVYGGL--TNPVSGNVMCHAPGCENPNQDVCNNASKPKSFGNRQMVNGILSFDDNPSEKPRLTVKTIPPSQNWFGAEGRLYKSyyTRNTFIYIRSSSWHALPQIGIINLTDDTYTQWVDNVAISRPGTNGCPFGNRCPKECVTGVYTDLFPLDSNYQYAITVTLKSQNTFVNPVLMVVNQTKIIYEKTVTTADQQAHYTTTTCFKFSRSLWCLSIVELEPGTVGERQPVPFLYKLPLEC--
13+
>UniRef100_A0A6M1UA71 127 0.216 6.125E-28 23 410 413 191 574 1602
14+
-----------------------GSCVRLPSFSLSSTIFAYTHTIMGHGCSELDVGDHYFSIGRIADHGHDQPVFETITEWFINDKINRRSCTVAAGKYEAWMGCVImtETFYDDMMSED--TGKVSISYLDVYGRKREWMYSRSEIRYDYNYASLYFSI----GSGVVIGDNVYFLVWGSLMYP--IEQNAYCNAPGCRNWTQQMCNQAQRPQTFGNHQMVNGILSFKTNTDGKPVLSVRTFTPGLIPLGTEGRLiyFENTNKTYIYLRSTTWHALPLTGEITFGPPLAIRWFQQTAVSRPGDAPCGASNRCPRQCVTGVYTDLFPLGTNYEYSMTAYLDSETRRVNPTLAFINTNSIIYEKTLTNSTQRAEYTTTTCFTFKLRIWCLSIVELSPSTITSFEPVPFLYQLDVGC--
15+
>UniRef100_A0A812SP11 123 0.243 1.621E-26 23 411 413 181 561 562
16+
-----------------------GGCVRIPSFSLT--IYAYTHNLITQGCQDIGKSYQYWQIGYITTNSDGVPEPNPSHTWDINDGMNRKSCSTAASGTGAWLGCSIPTVDE---RDDYASPGI--EDIVLDYQDIFGRRKSWRYTNSEIDFDRpYAALYPSVGSGIVVKGKVYFLGYGGLMHP--IQGNAYCNAPGCPSPTQEDCNKAQKPPWFGGRQIVNGILTFDDTGNDKPTLRVRTIPPTQNWMGAEGRLLYLGGKIYIYTRSTSWHSLLQIGIIDLGDPIRINWVPHTVLSRPGNDPCGWGNRCPKGCLTGVYTDAYPLSPDYSLVATVILNSQTSRVNPTITYSTPTEIVNMKQITNNTQEAAYTTTTCFTHFNKGWCFSIVEINPGTLNTYQPVLFKTEIPKSCS-
17+
>UniRef100_A0A7W3N9H6 123 0.229 2.181E-26 23 410 413 191 574 1052
18+
-----------------------GSCVRLPSFSLSSTVFAYTHTIMGHGCSELDVGDHYFAVGRIADAGHEIPQFETISSWFINDKINRRSCTVAAGAMEAWMGCVIMTETFYDDLDSLDTGKLTISYLDVFGRKKEWIYTRSEILYDYTYTSVYFSI----GSGVVVGDTVYFLIWGSLSSpiEETAY----CFAPGCSNYNQRMCNEAQRPSKFGHRQMVNGILRFKTTSTGKPLLSVRTLSPSVIPFGTEGRlIYSDITKIIYlYLRSTSWHALPLTGLLVLGPPTSISWVTQEAVSRPGEYPCGASNRCPKDCVTGVYTDLFPLGARYEYAATVYLNAETYRVNPTLALINQTSIIARKKITTESQKAGYTTTTCFVFKLRVWCVSVVELAPATMTAYEPIPFLYQLDLTC--
19+
>UniRef100_A0A936IHT9 112 0.238 6.427E-23 12 401 413 65 449 450
20+
------------PSLLSMPTTIDG-CVRTPSLSINDAIYAYtSNLITQGCQDIGKSYQ-VLQIGYITVNSDMYPDLNPRISHTYNINDNRKSCSVIAANTDVYQLCSTPTVNERTDYSSSGIEDIVFDIVNYDGSTSTHRYKNNNITFDHPYSAMYPSVGP----GIYYKNTIIFLGYGGLEHP--IQENTKCNTNRCPNKNQRDCNQASHSPWFSDRQMVNSIIRINNYLNSRPRIKVWTIPMRQNYWGSEGRLL-KLGNKIYiYTRSTSWHSNLQIGsiDINDYNPMRINWTWHNVLSRPGNPECPWFHSCPDECITGVYTDAYPLNPTGSNVSTTTLYSQTSRVNPTIMYSTTTERINMLRIRNKTLPAGYTTTSCITHYNKGYCFHIVEINHISLNTFQPML-----------
21+
>UniRef100_UPI00061BAB72 111 0.266 1.554E-22 154 411 413 29 284 288
22+
----------------------------------------------------------------------------------------------------------------------------------------------------------YSAMYPSVGPGIYYENTIIFLGYGGL--THPIQENTKCNTTSCPNKNQRDCNQASHSPWFSDRQMVNSIIHINNYLNDRPKIKVWTIPMTQNYWGSEGRLL-KLGNKIYiYTRSTSWHSNLQIGSIDINSPMTINWTWHNVLSRPGNPDCPWFHSCPRECITGVYTDAYPLNPTGFNVSTTTLYSQTSRVNPTIMYSTTTERINMLRIRNKQLEAGYTTTSCITHYNKGYCFHIVEINHTSLNTFQPMLFKTEIPKSCS-
23+
>UniRef100_UPI0011440D73 103 0.270 5.458E-20 154 410 413 73 331 363
24+
----------------------------------------------------------------------------------------------------------------------------------------------------------YTALYPSVGSGVSIGTTVYFLMYGGL---ETPHNDqSYCPRHMCndQTKNQQQCNKAQQPNWRSYKQMVNAIMSFNDQIESQPNFTIRTIPPSTQWMGSEGRLmYNWYSQKNYiYTRSTSWYPKMQFGFINLTPPHTIKWQNYTSISRPGERVCKSNNRCPAECVTGVYTDAYPLTSQDDLAMTVEHNDDYQRRSPTMKSVTPNNKTEQYDIYNNTQNADYTTTTCFMFNFQPWCISIVEMPPATINSMMPTPYLYPIWSNC--
25+
>UniRef100_F4LQG9 102 0.227 7.307E-20 25 410 413 208 589 930
26+
-------------------------CARIPTFSLSPYIWAYGHnVIRNGCADHGMSDQ-YFSIGVISESWGETPHFETLTSWYMDDETNRKSCSVAAGVSGAWMGC---TVVWQSFRDDYCSDGILPLHLSymdIFGRKRYWTYDPRVLGFASRFAAFYFGV----GSGVIVDGVVYLPFYAGL--ADNLTQSSFCHAPDCNNPQTSECDSAAQLVWLCSKVIVNGILYFNDDPIVRPVLRVAVINTRSNWLGAEMRLIHNyqLGITYIYTRSSGWHALPQVGLINLQNIAQVVWIDVTAIGRPGRDTCSAGSRCPSTCLSGVYNDIFPLGRYYEFGATVYLQSDTDRVHPTIAFLNTTRVFESMTLTTAEQRAQYSTTTCFVFKSKPWCLSIVEMEPSVVGTTTPVPFTYTLPLVC--
27+
>UniRef100_M0DWW1 96 0.232 5.743E-18 22 410 413 52 440 471
28+
----------------------SDECYTNPSFSIGSSIYMFSQEIRKTDCTAGEILSIQIVLGRIVDKGQQGPQASPLLVWAVPNPKIINSCAVAAGDEMGWVLCSVTLTaasGEPIphMFDGFWLYKLEPDTEVVSYRITGYAY--------LLDK-QYDSVFIGKGGGIQKGNDLYFQMYGLsRNRQSFK---ALCEHGSCLGTGGGGyqvlCDRAVMSFGSEESLITNAYLKVNDLASGKPVIIGQTFPPSDSYKGSNGRMYTIGDKYGLYLAPSSWNRYLRFGITPDISVRSTTWLKSQDPIMKILSTCTNTdrDMCPEICNTRGYQDIFPLseDSEYYTYIGITPNNGGTKN---FVAVRDSD----GHIASIDIlqnyySITSATISCFMYKDEIWCIAITEGKKQKDNPQRIYAHSYKIRQMC--
29+
>UniRef100_UPI000FE14678 96 0.232 5.743E-18 22 410 413 215 603 625
30+
----------------------SDECYTNPSFSIGSSIYMFSQEIRKTDCTAGEILSIQIVLGRIVDKGQQGPQASPLLVWAVPNPKIINSCAVAAGDEMGWVLCSVTLTaasGEPIphMFDGFWLYKLEPDTEVVSYRITGYAY--------LLDK-QYDSVFIGKGGGIQKGNDLYFQMYGLsRNRQSFK---ALCEHGSCLGTGGGGyqvlCDRAVMSFGSEESLITNAYLKVNDLASGKPVIIGQTFPPSDSYKGSNGRMYTIGDKYGLYLAPSSWNRYLRFGITPDISVRSTTWLKSQDPIMKILSTCTNTdrDMCPEICNTRGYQDIFPLseDSEYYTYIGITPNNGGTKN---FVAVRDSD----GHIASIDIlqnyySITSATISCFMYKDEIWCIAITEGKKQKDNPQRIYAHSYKIRQMC--
31+
>UniRef100_A0A8J2V3C9 78 0.266 4.214E-12 105 344 413 14 238 239
32+
---------------------------------------------------------------------------------------------------------ASSGIEDIVLDIVNYDGSISTTRF----KNNNISFDQPYAAL-------YPSVGP----GIYYKGKIIFLGYGGL---EHPINENvICNTTECPGKTQRDCNQASHSPWFSDRRMVNSIIVVDKGLNSIPKLKVWTISMRQNYWGSEGRLL-LLGNKIYiYTRSTSWHSKLQLGiiDITDYSDIRIKWTWHNVLSRPGNNECPWGHSCPNGCITGVYTDAYPLNPTGSIVSSVILDSQKSRVNP--------------------------------------------------------------------
33+
>UniRef100_UPI00203C3497 76 0.226 1.748E-11 153 395 413 401 644 665
34+
---------------------------------------------------------------------------------------------------------------------------------------------------------QYAALYPATGPGIFIGDHLVFLMWGGLMTK--AEGDAYCQASGCNDAHRTSCNIAQMPSAYGHRQLVNGLLMLPIKElGSHLIQPSLETISPKINwAGGHGRLYYNweINTTYIYIEGKTWRSRPNLGIISWSKPLSIRWIDHSVARRPGARPCDSANDCPEDCLVGGYYDMFPMSSDYKTAITIIPTHHQWPSSPALKLFNTNREVRVVMILRPPNNVKKTTISCIRIMQTNWCLGFI-IFKEGNN-----------------
35+
>UniRef100_A0A4R9BG17 72 0.226 2.250E-10 5 411 413 218 623 625
36+
-----TANPKRVPIFVSNVGGMlENSCTKEPVISMANGVFASTYLYLRDSCTDYQSSIRFFEMGIVKRLSDNDPYLSVIHTWDQASPFVLQPCSLAVAYDNGYALCAESVTG---VDNDLVTGNTI--RLVLFTFTLFGSLERKVIYYENFKRPReFVYIIPGAGQGVIIDNVMY--SIGYYVSENTPQGNLKCPTTGCPNLQYSTCDQFSRTQVSNHRHKFLTLIQVNLTQYPLPVHNLLVIPRSYYSIISHGNLYyrNSNDSVLFQLYNVGWYHKPLVGSINLTTPLSLEFLNKDYDLLSSVTNCvPGFG-CPSSCEISAYgaytpldynfNDAVsLIPRTSGAYPSVSYGSGNTR--------IDFRIILNQQLALRESSLV-----CYLptIQNtgHPYCVGLMTFEVTGQTA--PQLYSVgwKQTYQCS-
37+
>UniRef100_UPI00227ACD99 66 0.228 2.755E-08 25 391 413 188 574 595
38+
-------------------------CTRIPSFSLSKTHWCYSHNIIASGCqDHGHSSQYIsMGVLQVTSNG--TPSFRTTASQYLSDGLNRKSCSIIATPYGCDLLCSVVTetenddyASDPptemILGRLFFNGT--YTERVINPPGFFGDWVANYPGVGSgVVYGGKILFPIYG--GVKQNTSLFNQLSG-------KYFFPHNPKYPCSNSTQQQIQRA----KNSYYppkfsgrLWQQGILICPLSQFLTTDCRIKVFNNSTVMMGAEGRLYLIGNNLYYYQRSSSWwpvgllyKLSLNFSNSV---PSITNikWIPIYQFPRPGSGPCTGPNVCPAVCVTGVYQDIWPLSNpstanpnlSNIVWVGQYLNAPTARKNP--TIYIANQYSWKNQvrLFNSNTEAAYSTTTCF--KNtgtdRVYCLIIIELGD---------------------
39+
>UniRef100_A0A7J8PBV8 59 0.236 2.495E-06 152 325 413 101 290 302
40+
--------------------------------------------------------------------------------------------------------------------------------------------------------GQWATINPAVGSGIYHPGFILFPVYGGLINGTTSYNEQSSryfipkhPNITCagnSSTQAAIARSSYVIRYHSNRLIQSAVLICPLSDMHTEECNLVMFNNSQVMMGAEGRLYVIGNNLYYYQRSSSWWSASLFYRINTdfskgIPPIIeAQWVPSYQVPRPGVMPCNATSFCPANCITGVYADVWPLNN---------------------------------------------------------------------------------------
41+
>UniRef100_UPI0002A41B43 59 0.274 4.377E-06 230 409 413 382 569 571
42+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GKDP---VLSVLDNNVTLmGAEARVMTIGGKLYLYQRGTSW-----FPSAL-LYPLTITNGTATFSSpyifdnftRPGSHPCSAASRCPNSCVTGVYTDAYpLVFSRNHKVRGVYgmmLNDRTARLNPVAAVFFRISMSNVTRVSSSPTKAAYTTSTCFkvVKTGRVYCISIAEIGNTlfGEFRIVPLLVEILSDER---
43+
>UniRef100_A0A0Q4Q434 56 0.196 2.361E-05 25 405 413 55 457 468
44+
-------------------------CSRFPSYSNHYGLWCYSHTVSNDTCDGSNPSVQILSVGKLITGDNGQPEHKTLYTQQLSQTDRLYHCSVTMTTLGCYILCS-----KPRVNETQDYETIGIEPMIIGMLGLDGVYTDLGNPVGISDNSLY--AMYPGPgGGVMYKDFLVFPLHGGVRFSEaskmlgknitfrgFPPSDTCTEHEKSLTQEPANMLTSPYY---GEVLVLDFLYVCTLLDNIPGECSIQLIPPDNMTMGSESKLYKLNNSLLLYKRSSSWwpyTEVYQLSLRVSKNSMKVRESvrlNITSTTRPGVEGCNINKVCPKVCVTGVFQAPGIIRKAlspkesneDLLFFQAWTSDSIARQGPLISLCRADSCVLTIPLGNSDVFIGYTDSFCLSDRDneKIYCVALLELDNMpySEMTIRSFLYLIK-------
45+
>UniRef100_UPI001F11728C 56 0.219 2.361E-05 20 389 413 173 564 587
46+
--------------------GKSKGCTRFPSFSVYFGFWCYTHAVSDQNCEGSSPTYQRVRIGIIkknLSDGSPYKTLGTT---TLPRGNRRRGCSVTSSIYGCYLLCSKPNVSETDDYKT--QGIEPMTILFL---SRDGITTDLFDNIQSTTE-SWNALYPGEGSGVWHMGYLIFPLWGGIpFKTPFAENIWNITLrgfpvgPSCKETLPDKFNLGNkdSVLFSPYYgenVMVFGLLVcYMLENLPGHCQVQI-LNPSNLTMGSESQLYVLNGILYLYQRSASW---WPYTQLYRLNLRSTNRKLRvrsiiripiTSTTRPGYEGCNIYKVCPKVCVTGVFQAPWIInidsirdrDVRNLLFFQAWSGDFNTRQGPLVSLCSQDTCPLTTPLANSKSHMGYTTTYCYpsRSENKLYCTVFIEL-----------------------
47+
>UniRef100_A0A7W4H6B0 55 0.252 7.253E-05 191 309 413 1 119 120
48+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CQAPECTSATQESCNSNQLIGYFSGRQIVNCIIEIITVGTEKPIIRIRTIPNSQVWLGAEGRIQTLGGVLYLYIRSSGWHALAQTGIILTLDPIRISWIENTGYSRPGNRPCPASSRCP-------------------------------------------------------------------------------------------------------
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
sample_id,design_yaml,structure_files,protocol,num_designs,budget,reuse,target_msa
2-
2vsm_protein_binder,assets/test_data/2VSM_protein_design.yaml,assets/test_data/2VSM.cif,protein-anything,3,2,,assets/test_data/nipah_glycoprotein_msa_Uniref30_2302.a3m
2+
2vsm_protein_binder,assets/test_data/2VSM_protein_design.yaml,assets/test_data/2VSM.cif,protein-anything,3,2,,assets/test_data/2VSM_seq.Uniref30_2302.a3m

bin/prepare_boltz2_input.py

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def parse_args():
1414
parser.add_argument('--parent_id', required=True, help='Parent ID')
1515
parser.add_argument('--predict_affinity', action='store_true', help='Enable affinity prediction')
1616
parser.add_argument('--output_dir', default='yaml_inputs', help='Directory to save YAML files')
17+
parser.add_argument('--treat_as_designed', action='store_true', help='Treat the first sequence as a designed sequence (do not skip)')
1718
return parser.parse_args()
1819

1920
def main():
@@ -65,81 +66,57 @@ def main():
6566

6667
print(f"Found {len(sequences)} sequences in {fasta_file}")
6768

68-
# Skip the first sequence (it's always the original sequence from Boltzgen)
69-
# We only want to refold the NEW sequences generated by ProteinMPNN
70-
sequences_to_process = sequences[1:] if len(sequences) > 1 else []
69+
# Determine which sequences to process
70+
if args.treat_as_designed:
71+
# If treating as designed, process ALL sequences (including the first one)
72+
sequences_to_process = sequences
73+
print(f"Processing all {len(sequences_to_process)} sequences (treating first as designed)")
74+
else:
75+
# Default behavior: Skip the first sequence (original from Boltzgen)
76+
sequences_to_process = sequences[1:] if len(sequences) > 1 else []
77+
print(f"Processing {len(sequences_to_process)} new MPNN sequences (skipping original)")
7178

7279
if not sequences_to_process:
73-
print(f"⚠ Warning: Only found 1 sequence (original), no new MPNN sequences to refold")
80+
print(f"⚠ Warning: No sequences to process in {fasta_file}")
7481
continue
7582

76-
print(f"Processing {len(sequences_to_process)} new MPNN sequences (skipping original)")
77-
78-
# Create Boltz-2 YAML for each NEW sequence (skip first one)
83+
# Create Boltz-2 YAML for each sequence
7984
for idx, (header, binder_seq) in enumerate(sequences_to_process):
8085
# Create YAML input for Boltz-2
8186
# Format: binder (designed sequence) + target (original protein)
8287
# Note: Only target gets MSA; Boltz-2 will infer missing MSA info for binder
88+
# Ensure binder sequence contains only the first chain (strip any '/' separators)
89+
binder_seq_clean = binder_seq.split('/')[0] if '/' in binder_seq else binder_seq
8390
binder_entry = {
8491
'protein': {
85-
'id': 'BINDER',
86-
'sequence': binder_seq,
92+
'id': 'A',
93+
'sequence': binder_seq_clean,
8794
'msa': 'empty'
8895
}
8996
}
9097

9198
target_entry = {
9299
'protein': {
93-
'id': 'TARGET',
100+
'id': 'B',
94101
'sequence': target_seq
95102
}
96103
}
97-
98-
# Add target MSA if available (binder MSA will be inferred by Boltz-2)
99104
if has_target_msa and target_msa_path:
100105
target_entry['protein']['msa'] = os.path.abspath(target_msa_path)
101106
print(f" Adding target MSA: {target_msa_path}")
102-
103-
# Check for multi-chain sequence (ProteinMPNN uses / separator)
104-
if '/' in binder_seq:
105-
# Multi-chain case: ProteinMPNN output includes all chains
106-
# We split them and create separate entities
107-
parts = binder_seq.split('/')
108-
seq_list = []
109-
for i, part in enumerate(parts):
110-
# Use simple IDs: A, B, C...
111-
chain_id = chr(65+i)
112-
seq_list.append({
113-
'protein': {
114-
'id': chain_id,
115-
'sequence': part,
116-
'msa': 'empty'
117-
}
118-
})
119-
# Add the target entry (with MSA if available) to the sequences list
120-
seq_list.append(target_entry)
121-
122-
boltz2_input = {
123-
'version': 1,
124-
'sequences': seq_list
125-
}
126-
print(f" Detected multi-chain sequence ({len(parts)} chains)")
127-
128-
else:
129-
# Single chain case: Binder + Target
130-
boltz2_input = {
131-
'version': 1,
132-
'sequences': [binder_entry, target_entry]
133-
}
134-
135-
# Add affinity prediction property (only for single binder case)
136-
# Note: Boltz-2 currently only supports affinity for ligands, so this might fail for proteins
137-
if args.predict_affinity:
138-
boltz2_input['properties'] = [
139-
{'affinity': {'binder': 'BINDER'}}
140-
]
107+
# Build final YAML input with exactly two entries
108+
boltz2_input = {
109+
'version': 1,
110+
'sequences': [binder_entry, target_entry]
111+
}
112+
# Add affinity prediction property (only for single binder case)
113+
if args.predict_affinity:
114+
boltz2_input['properties'] = [
115+
{'affinity': {'binder': 'A'}}
116+
]
141117

142118
# Write YAML file
119+
# Use a unique suffix based on the loop index to avoid overwriting
143120
yaml_file = f"{args.output_dir}/{output_base}_seq_{yaml_count}.yaml"
144121
with open(yaml_file, 'w') as yf:
145122
yaml.dump(boltz2_input, yf, default_flow_style=False)

conf/base.config

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,10 @@ process {
113113
// Container GPU access for Docker
114114
containerOptions = '--gpus all'
115115
}
116+
117+
withName:BOLTZ2_REFOLD {
118+
accelerator = { check_max( 1, 'gpus' ) }
119+
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
120+
containerOptions = '--gpus all -e TORCH_FLOAT32_MATMUL_PRECISION=medium'
121+
}
116122
}

conf/test_design_nanobody.config

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@ params {
1818
config_profile_name = 'Test profile - Design Mode (Nanobody)'
1919
config_profile_description = 'Test dataset for design mode using 2VSM with pre-made nanobody YAML spec'
2020

21-
// Limit resources for fast testing
22-
max_cpus = 2
23-
max_memory = '6.GB'
24-
max_time = '6.h'
25-
max_gpus = 1
26-
2721
// Input data - design mode with 2VSM nanobody
2822
input = "${projectDir}/assets/test_data/samplesheet_design_nanobody.csv"
2923
mode = 'design'

conf/test_design_peptide.config

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@ params {
1818
config_profile_name = 'Test profile - Design Mode (Peptide)'
1919
config_profile_description = 'Test dataset for design mode using 2VSM with pre-made peptide YAML spec'
2020

21-
// Limit resources for fast testing
22-
max_cpus = 2
23-
max_memory = '6.GB'
24-
max_time = '6.h'
25-
max_gpus = 1
26-
2721
// Input data - design mode with 2VSM peptide
2822
input = "${projectDir}/assets/test_data/samplesheet_design_peptide.csv"
2923
mode = 'design'

conf/test_design_protein.config

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@ params {
1818
config_profile_name = 'Test profile - Design Mode (Protein)'
1919
config_profile_description = 'Test dataset for design mode using 2VSM with pre-made protein YAML spec'
2020

21-
// Limit resources for fast testing
22-
max_cpus = 2
23-
max_memory = '6.GB'
24-
max_time = '6.h'
25-
max_gpus = 1
26-
2721
// Input data - design mode with 2VSM protein
2822
input = "${projectDir}/assets/test_data/samplesheet_design_protein.csv"
2923
mode = 'design'

0 commit comments

Comments
 (0)