@@ -72,6 +72,352 @@ def assess_prepped_protein(output_directory, input_openeye_du):
7272 with open (output_directory / f"{ stem } _quality_report.json" , "w" ) as f :
7373 json .dump (report , f , indent = 4 )
7474
75+ # Prep for docking
76+ from typing import TYPE_CHECKING , Optional
77+ # copying everything
78+ # TODO delete everything from here that isn't needed
79+
80+ def postera (func ):
81+ return click .option (
82+ "--postera" ,
83+ is_flag = True ,
84+ default = False ,
85+ help = "Whether to download complexes from Postera." ,
86+ )(func )
87+
88+
89+ def postera_molset_name (func ):
90+ return click .option (
91+ "--postera-molset-name" ,
92+ type = str ,
93+ default = None ,
94+ help = "The name of the Postera molecule set to use." ,
95+ )(func )
96+
97+
98+ def postera_upload (func ):
99+ return click .option (
100+ "--postera-upload" ,
101+ is_flag = True ,
102+ default = False ,
103+ help = "Whether to upload results to Postera." ,
104+ )(func )
105+
106+
107+ def postera_args (func ):
108+ return postera (postera_molset_name (postera_upload (func )))
109+
110+
111+ def use_dask (func ):
112+ return click .option (
113+ "--use-dask" ,
114+ is_flag = True ,
115+ default = False ,
116+ help = "Whether to use dask for parallelism." ,
117+ )(func )
118+
119+
120+ def dask_type (func ):
121+ return click .option (
122+ "--dask-type" ,
123+ type = click .Choice (DaskType .get_values (), case_sensitive = False ),
124+ default = DaskType .LOCAL ,
125+ help = "The type of dask cluster to use. Local mode is reccommended for most use cases." ,
126+ )(func )
127+
128+
129+ def failure_mode (func ):
130+ return click .option (
131+ "--failure-mode" ,
132+ type = click .Choice (FailureMode .get_values (), case_sensitive = False ),
133+ default = FailureMode .SKIP ,
134+ help = "The failure mode for dask. Can be 'raise' or 'skip'." ,
135+ show_default = True ,
136+ )(func )
137+
138+
139+ def dask_n_workers (func ):
140+ return click .option (
141+ "--dask-n-workers" ,
142+ type = int ,
143+ default = None ,
144+ help = "The number of workers to use with dask." ,
145+ )(func )
146+
147+
148+ def dask_args (func ):
149+ return use_dask (dask_type (dask_n_workers (failure_mode (func ))))
150+
151+
152+ def target (func ):
153+ from asapdiscovery .data .services .postera .manifold_data_validation import TargetTags
154+ return click .option (
155+ "--target" ,
156+ type = click .Choice (TargetTags .get_values (), case_sensitive = True ),
157+ help = "The target for the workflow" ,
158+ required = True ,
159+ )(func )
160+
161+
162+ def ligands (func ):
163+ return click .option (
164+ "-l" ,
165+ "--ligands" ,
166+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
167+ help = "File containing ligands" ,
168+ )(func )
169+
170+
171+ def output_dir (func ):
172+ return click .option (
173+ "--output-dir" ,
174+ type = click .Path (
175+ resolve_path = True , exists = False , file_okay = False , dir_okay = True
176+ ),
177+ help = "The directory to output results to." ,
178+ default = "output" ,
179+ )(func )
180+
181+
182+ def overwrite (func ):
183+ return click .option (
184+ "--overwrite/--no-overwrite" ,
185+ default = True ,
186+ help = "Whether to overwrite the output directory if it exists." ,
187+ )(func )
188+
189+
190+ def input_json (func ):
191+ return click .option (
192+ "--input-json" ,
193+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
194+ help = "Path to a json file containing the inputs to the workflow, WARNING: overrides all other inputs." ,
195+ )(func )
196+
197+ # flag to run all ml scorers
198+ def ml_score (func ):
199+ return click .option (
200+ "--ml-score" ,
201+ is_flag = True ,
202+ default = True ,
203+ help = "Whether to run all ml scorers" ,
204+ )(func )
205+
206+
207+ def fragalysis_dir (func ):
208+ return click .option (
209+ "--fragalysis-dir" ,
210+ type = click .Path (resolve_path = True , exists = True , file_okay = False , dir_okay = True ),
211+ help = "Path to a directory containing fragments to dock." ,
212+ )(func )
213+
214+
215+ def structure_dir (func ):
216+ return click .option (
217+ "--structure-dir" ,
218+ type = click .Path (resolve_path = True , exists = True , file_okay = False , dir_okay = True ),
219+ help = "Path to a directory containing structures." ,
220+ )(func )
221+
222+
223+ def pdb_file (func ):
224+ return click .option (
225+ "--pdb-file" ,
226+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
227+ help = "Path to a pdb file containing a structure" ,
228+ )(func )
229+
230+
231+ def cache_dir (func ):
232+ return click .option (
233+ "--cache-dir" ,
234+ type = click .Path (
235+ resolve_path = True , exists = False , file_okay = False , dir_okay = True
236+ ),
237+ help = "Path to a directory where design units are cached." ,
238+ )(func )
239+
240+
241+ def use_only_cache (func ):
242+ return click .option (
243+ "--use-only-cache" ,
244+ is_flag = True ,
245+ default = False ,
246+ help = "Whether to only use the cache." ,
247+ )(func )
248+
249+
250+ def gen_cache_w_default (func ):
251+ return click .option (
252+ "--gen-cache" ,
253+ type = click .Path (
254+ resolve_path = False , exists = False , file_okay = False , dir_okay = True
255+ ),
256+ help = "Path to a directory where a design unit cache should be generated." ,
257+ default = "prepped_structure_cache" ,
258+ )(func )
259+
260+
261+ def md (func ):
262+ return click .option (
263+ "--md" ,
264+ is_flag = True ,
265+ default = False ,
266+ help = "Whether to run MD" ,
267+ )(func )
268+
269+
270+ def md_steps (func ):
271+ return click .option (
272+ "--md-steps" ,
273+ type = int ,
274+ default = 2500000 ,
275+ help = "Number of MD steps" ,
276+ )(func )
277+
278+ def core_smarts (func ):
279+ return click .option (
280+ "-cs" ,
281+ "--core-smarts" ,
282+ type = click .STRING ,
283+ help = "The SMARTS which should be used to select which atoms to constrain to the reference structure." ,
284+ )(func )
285+
286+
287+ def save_to_cache (func ):
288+ return click .option (
289+ "--save-to-cache/--no-save-to-cache" ,
290+ help = "If the newly generated structures should be saved to the cache folder." ,
291+ default = True ,
292+ )(func )
293+
294+
295+ def loglevel (func ):
296+ return click .option (
297+ "--loglevel" ,
298+ type = click .Choice (["DEBUG" , "INFO" , "WARNING" , "ERROR" , "CRITICAL" ]),
299+ help = "The log level to use." ,
300+ default = "INFO" ,
301+ show_default = True ,
302+ )(func )
303+
304+
305+ def ref_chain (func ):
306+ return click .option (
307+ "--ref-chain" ,
308+ type = str ,
309+ default = None ,
310+ help = "Chain ID to align to in reference structure containing the active site." ,
311+ )(func )
312+
313+
314+ def active_site_chain (func ):
315+ return click .option (
316+ "--active-site-chain" ,
317+ type = str ,
318+ default = None ,
319+ help = "Active site chain ID to align to ref_chain in reference structure" ,
320+ )(func )
321+
322+ from asapdiscovery .data .util .dask_utils import DaskType , FailureMode
323+
324+ if TYPE_CHECKING :
325+ from asapdiscovery .data .services .postera .manifold_data_validation import TargetTags
326+
327+
328+ @cli .command (
329+ "prep-protein-for-docking" ,
330+ help = "Prep protein to make OE Design Units and corresponding schema." ,
331+ )
332+ @target
333+ @click .option (
334+ "--align" ,
335+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
336+ help = "Path to a reference structure to align to" ,
337+ )
338+ @ref_chain
339+ @active_site_chain
340+ @click .option (
341+ "--seqres-yaml" ,
342+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
343+ help = "Path to a seqres yaml file to mutate to, if not specified will use the default for the target" ,
344+ )
345+ @click .option (
346+ "--loop-db" ,
347+ type = click .Path (resolve_path = True , exists = True , file_okay = True , dir_okay = False ),
348+ help = "Path to a loop database to use for prepping" ,
349+ )
350+ @click .option (
351+ "--oe-active-site-residue" ,
352+ type = str ,
353+ help = "OE formatted string of active site residue to use if not ligand bound" ,
354+ )
355+ @pdb_file
356+ @fragalysis_dir
357+ @structure_dir
358+ @click .option (
359+ "--cache-dir" ,
360+ help = "The path to cached prepared complexes which can be used again." ,
361+ type = click .Path (resolve_path = True , exists = True , file_okay = False , dir_okay = True ),
362+ )
363+ @save_to_cache
364+ @dask_args
365+ @output_dir
366+ @input_json
367+ def protein_prep (
368+ target : "TargetTags" ,
369+ align : Optional [str ] = None ,
370+ ref_chain : Optional [str ] = None ,
371+ active_site_chain : Optional [str ] = None ,
372+ seqres_yaml : Optional [str ] = None ,
373+ loop_db : Optional [str ] = None ,
374+ oe_active_site_residue : Optional [str ] = None ,
375+ pdb_file : Optional [str ] = None ,
376+ fragalysis_dir : Optional [str ] = None ,
377+ structure_dir : Optional [str ] = None ,
378+ cache_dir : Optional [str ] = None ,
379+ save_to_cache : bool = True ,
380+ use_dask : bool = False ,
381+ dask_type : DaskType = DaskType .LOCAL ,
382+ dask_n_workers : Optional [int ] = None ,
383+ failure_mode : FailureMode = FailureMode .SKIP ,
384+ output_dir : str = "output" ,
385+ input_json : Optional [str ] = None ,
386+ ):
387+ """
388+ Run protein prep on a set of structures.
389+ """
390+ from asapdiscovery .workflows .prep_workflows .protein_prep import (
391+ ProteinPrepInputs ,
392+ protein_prep_workflow ,
393+ )
394+
395+ if input_json is not None :
396+ print ("Loading inputs from json file... Will override all other inputs." )
397+ inputs = ProteinPrepInputs .from_json_file (input_json )
398+
399+ else :
400+ inputs = ProteinPrepInputs (
401+ target = target ,
402+ align = align ,
403+ ref_chain = ref_chain ,
404+ active_site_chain = active_site_chain ,
405+ seqres_yaml = seqres_yaml ,
406+ loop_db = loop_db ,
407+ oe_active_site_residue = oe_active_site_residue ,
408+ pdb_file = pdb_file ,
409+ fragalysis_dir = fragalysis_dir ,
410+ structure_dir = structure_dir ,
411+ cache_dir = cache_dir ,
412+ save_to_cache = save_to_cache ,
413+ use_dask = use_dask ,
414+ dask_type = dask_type ,
415+ dask_n_workers = dask_n_workers ,
416+ failure_mode = failure_mode ,
417+ output_dir = output_dir ,
418+ )
419+
420+ protein_prep_workflow (inputs )
75421
76422# TODO: check for openeye installation, maybe make it a decorator
77423@cli .command (
@@ -236,10 +582,10 @@ def process_bindingdb(input_directory, output_directory):
236582 "has_3d" : mol .to_oemol ().GetDimension () == 3 ,
237583 "num_atoms" : mol .to_oemol ().NumAtoms (),
238584 "smiles" : mol .smiles ,
239- # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
585+ # "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
240586 # if mol.tags.get("PDB ID") # removed trailing space
241- "pdb_id" : mol .tags .get ("PDB ID(s) for Ligand-Target Complex" )[:4 ] # removed trailing space
242- if mol .tags .get ("PDB ID(s) for Ligand-Target Complex" )
587+ "pdb_id" : mol .tags .get ("PDB ID(s) for Ligand-Target Complex" )[:4 ] # removed trailing space
588+ if mol .tags .get ("PDB ID(s) for Ligand-Target Complex" )
243589 else "" ,
244590 }
245591
0 commit comments