@@ -176,7 +176,9 @@ def mmseqs_search_pair(
176176 dbbase : Path ,
177177 base : Path ,
178178 uniref_db : Path = Path ("uniref30_2302_db" ),
179+ spire_db : Path = Path ("spire_ctg10_2401_db" ),
179180 mmseqs : Path = Path ("mmseqs" ),
181+ pair_env : bool = True ,
180182 prefilter_mode : int = 0 ,
181183 s : float = 8 ,
182184 threads : int = 64 ,
@@ -200,6 +202,13 @@ def mmseqs_search_pair(
200202 dbSuffix1 = ".idx"
201203 dbSuffix2 = ".idx"
202204
205+ if pair_env :
206+ db = spire_db
207+ output = ".env.paired.a3m"
208+ else :
209+ db = uniref_db
210+ output = ".paired.a3m"
211+
203212 # fmt: off
204213 # @formatter:off
205214 search_param = ["--num-iterations" , "3" , "--db-load-mode" , str (db_load_mode ), "-a" , "-e" , "0.1" , "--max-seqs" , "10000" ,]
@@ -209,16 +218,14 @@ def mmseqs_search_pair(
209218 else :
210219 search_param += ["--k-score" , "'seq:96,prof:80'" ]
211220 expand_param = ["--expansion-mode" , "0" , "-e" , "inf" , "--expand-filter-clusters" , "0" , "--max-seq-id" , "0.95" ,]
212- run_mmseqs (mmseqs , ["search" , base .joinpath ("qdb" ), dbbase .joinpath (uniref_db ), base .joinpath ("res" ), base .joinpath ("tmp" ), "--threads" , str (threads ),] + search_param ,)
213- run_mmseqs (mmseqs , ["expandaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } { dbSuffix1 } " ), base .joinpath ("res" ), dbbase .joinpath (f"{ uniref_db } { dbSuffix2 } " ), base .joinpath ("res_exp" ), "--db-load-mode" , str (db_load_mode ), "--threads" , str (threads ),] + expand_param ,)
214- run_mmseqs (mmseqs , ["align" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } { dbSuffix1 } " ), base .joinpath ("res_exp" ), base .joinpath ("res_exp_realign" ), "--db-load-mode" , str (db_load_mode ), "-e" , "0.001" , "--max-accept" , "1000000" , "--threads" , str (threads ), "-c" , "0.5" , "--cov-mode" , "1" ,],)
215- run_mmseqs (mmseqs , ["pairaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } " ), base .joinpath ("res_exp_realign" ), base .joinpath ("res_exp_realign_pair" ), "--db-load-mode" , str (db_load_mode ), "--pairing-mode" , str (pairing_strategy ), "--pairing-dummy-mode" , "0" , "--threads" , str (threads ), ],)
216- run_mmseqs (mmseqs , ["align" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } { dbSuffix1 } " ), base .joinpath ("res_exp_realign_pair" ), base .joinpath ("res_exp_realign_pair_bt" ), "--db-load-mode" , str (db_load_mode ), "-e" , "inf" , "-a" , "--threads" , str (threads ), ],)
217- run_mmseqs (mmseqs , ["pairaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } " ), base .joinpath ("res_exp_realign_pair_bt" ), base .joinpath ("res_final" ), "--db-load-mode" , str (db_load_mode ), "--pairing-mode" , str (pairing_strategy ), "--pairing-dummy-mode" , "1" , "--threads" , str (threads ),],)
218- run_mmseqs (mmseqs , ["result2msa" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ uniref_db } { dbSuffix1 } " ), base .joinpath ("res_final" ), base .joinpath ("pair.a3m" ), "--db-load-mode" , str (db_load_mode ), "--msa-format-mode" , "5" , "--threads" , str (threads ),],)
219- run_mmseqs (mmseqs , ["unpackdb" , base .joinpath ("pair.a3m" ), base .joinpath ("." ), "--unpack-name-mode" , "0" , "--unpack-suffix" , ".paired.a3m" ,],)
220- run_mmseqs (mmseqs , ["rmdb" , base .joinpath ("qdb" )])
221- run_mmseqs (mmseqs , ["rmdb" , base .joinpath ("qdb_h" )])
221+ run_mmseqs (mmseqs , ["search" , base .joinpath ("qdb" ), dbbase .joinpath (db ), base .joinpath ("res" ), base .joinpath ("tmp" ), "--threads" , str (threads ),] + search_param ,)
222+ run_mmseqs (mmseqs , ["expandaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } { dbSuffix1 } " ), base .joinpath ("res" ), dbbase .joinpath (f"{ db } { dbSuffix2 } " ), base .joinpath ("res_exp" ), "--db-load-mode" , str (db_load_mode ), "--threads" , str (threads ),] + expand_param ,)
223+ run_mmseqs (mmseqs , ["align" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } { dbSuffix1 } " ), base .joinpath ("res_exp" ), base .joinpath ("res_exp_realign" ), "--db-load-mode" , str (db_load_mode ), "-e" , "0.001" , "--max-accept" , "1000000" , "--threads" , str (threads ), "-c" , "0.5" , "--cov-mode" , "1" ,],)
224+ run_mmseqs (mmseqs , ["pairaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } " ), base .joinpath ("res_exp_realign" ), base .joinpath ("res_exp_realign_pair" ), "--db-load-mode" , str (db_load_mode ), "--pairing-mode" , str (pairing_strategy ), "--pairing-dummy-mode" , "0" , "--threads" , str (threads ), ],)
225+ run_mmseqs (mmseqs , ["align" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } { dbSuffix1 } " ), base .joinpath ("res_exp_realign_pair" ), base .joinpath ("res_exp_realign_pair_bt" ), "--db-load-mode" , str (db_load_mode ), "-e" , "inf" , "-a" , "--threads" , str (threads ), ],)
226+ run_mmseqs (mmseqs , ["pairaln" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } " ), base .joinpath ("res_exp_realign_pair_bt" ), base .joinpath ("res_final" ), "--db-load-mode" , str (db_load_mode ), "--pairing-mode" , str (pairing_strategy ), "--pairing-dummy-mode" , "1" , "--threads" , str (threads ),],)
227+ run_mmseqs (mmseqs , ["result2msa" , base .joinpath ("qdb" ), dbbase .joinpath (f"{ db } { dbSuffix1 } " ), base .joinpath ("res_final" ), base .joinpath ("pair.a3m" ), "--db-load-mode" , str (db_load_mode ), "--msa-format-mode" , "5" , "--threads" , str (threads ),],)
228+ run_mmseqs (mmseqs , ["unpackdb" , base .joinpath ("pair.a3m" ), base .joinpath ("." ), "--unpack-name-mode" , "0" , "--unpack-suffix" , output ,],)
222229 run_mmseqs (mmseqs , ["rmdb" , base .joinpath ("res" )])
223230 run_mmseqs (mmseqs , ["rmdb" , base .joinpath ("res_exp" )])
224231 run_mmseqs (mmseqs , ["rmdb" , base .joinpath ("res_exp_realign" )])
@@ -230,7 +237,6 @@ def mmseqs_search_pair(
230237 # @formatter:on
231238 # fmt: on
232239
233-
234240def main ():
235241 parser = ArgumentParser (formatter_class = ArgumentDefaultsHelpFormatter )
236242 parser .add_argument (
@@ -271,11 +277,15 @@ def main():
271277 default = Path ("colabfold_envdb_202108_db" ),
272278 help = "Environmental database" ,
273279 )
280+ parser .add_argument ("--db4" , type = Path , default = Path ("spire_ctg10_2401_db" ), help = "Environmental pairing database" )
274281
275282 # poor man's boolean arguments
276283 parser .add_argument (
277284 "--use-env" , type = int , default = 1 , choices = [0 , 1 ], help = "Use --db3"
278285 )
286+ parser .add_argument (
287+ "--use-env-pairing" , type = int , default = 0 , choices = [0 , 1 ], help = "Use --db4"
288+ )
279289 parser .add_argument (
280290 "--use-templates" , type = int , default = 0 , choices = [0 , 1 ], help = "Use --db2"
281291 )
@@ -418,7 +428,22 @@ def main():
418428 db_load_mode = args .db_load_mode ,
419429 threads = args .threads ,
420430 pairing_strategy = args .pairing_strategy ,
431+ pair_env = False ,
421432 )
433+ if args .use_env_pairing :
434+ mmseqs_search_pair (
435+ mmseqs = args .mmseqs ,
436+ dbbase = args .dbbase ,
437+ base = args .base ,
438+ uniref_db = args .db1 ,
439+ spire_db = args .db4 ,
440+ prefilter_mode = args .prefilter_mode ,
441+ s = args .s ,
442+ db_load_mode = args .db_load_mode ,
443+ threads = args .threads ,
444+ pairing_strategy = args .pairing_strategy ,
445+ pair_env = True ,
446+ )
422447
423448 id = 0
424449 for job_number , (
@@ -434,6 +459,14 @@ def main():
434459 with args .base .joinpath (f"{ id } .a3m" ).open ("r" ) as f :
435460 unpaired_msa .append (f .read ())
436461 args .base .joinpath (f"{ id } .a3m" ).unlink ()
462+
463+ if args .use_env_pairing :
464+ with open (args .base .joinpath (f"{ id } .paired.a3m" ), 'a' ) as file_pair :
465+ with open (args .base .joinpath (f"{ id } .env.paired.a3m" ), 'r' ) as file_pair_env :
466+ while chunk := file_pair_env .read (10 * 1024 * 1024 ):
467+ file_pair .write (chunk )
468+ args .base .joinpath (f"{ id } .env.paired.a3m" ).unlink ()
469+
437470 if len (query_seqs_cardinality ) > 1 :
438471 with args .base .joinpath (f"{ id } .paired.a3m" ).open ("r" ) as f :
439472 paired_msa .append (f .read ())
0 commit comments