Skip to content

Commit 7ef7ca9

Browse files
committed
add prep-protein-for-docking step to avoid using the asapdiscovery cli which has ML import problems
1 parent cbc3866 commit 7ef7ca9

File tree

2 files changed

+351
-5
lines changed

2 files changed

+351
-5
lines changed

modules.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ process PREP_FOR_DOCKING {
7878

7979
script:
8080
"""
81-
asap-cli protein-prep \
81+
plumbline prep-protein-for-docking \
8282
--target SARS-CoV-2-Mpro \
8383
--pdb-file "${prepped_pdb}" \
8484
--output-dir "./"
@@ -166,4 +166,4 @@ process VISUALIZE_NETWORK {
166166
--network-graphml "${network_graph}" \
167167
--output-directory "./"
168168
"""
169-
}
169+
}

pkg/src/plumbdb/cli.py

Lines changed: 349 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,352 @@ def assess_prepped_protein(output_directory, input_openeye_du):
7272
with open(output_directory / f"{stem}_quality_report.json", "w") as f:
7373
json.dump(report, f, indent=4)
7474

75+
# Prep for docking
76+
from typing import TYPE_CHECKING, Optional
77+
# copying everything
78+
# TODO delete everything from here that isn't needed
79+
80+
def postera(func):
81+
return click.option(
82+
"--postera",
83+
is_flag=True,
84+
default=False,
85+
help="Whether to download complexes from Postera.",
86+
)(func)
87+
88+
89+
def postera_molset_name(func):
90+
return click.option(
91+
"--postera-molset-name",
92+
type=str,
93+
default=None,
94+
help="The name of the Postera molecule set to use.",
95+
)(func)
96+
97+
98+
def postera_upload(func):
99+
return click.option(
100+
"--postera-upload",
101+
is_flag=True,
102+
default=False,
103+
help="Whether to upload results to Postera.",
104+
)(func)
105+
106+
107+
def postera_args(func):
108+
return postera(postera_molset_name(postera_upload(func)))
109+
110+
111+
def use_dask(func):
112+
return click.option(
113+
"--use-dask",
114+
is_flag=True,
115+
default=False,
116+
help="Whether to use dask for parallelism.",
117+
)(func)
118+
119+
120+
def dask_type(func):
121+
return click.option(
122+
"--dask-type",
123+
type=click.Choice(DaskType.get_values(), case_sensitive=False),
124+
default=DaskType.LOCAL,
125+
help="The type of dask cluster to use. Local mode is reccommended for most use cases.",
126+
)(func)
127+
128+
129+
def failure_mode(func):
130+
return click.option(
131+
"--failure-mode",
132+
type=click.Choice(FailureMode.get_values(), case_sensitive=False),
133+
default=FailureMode.SKIP,
134+
help="The failure mode for dask. Can be 'raise' or 'skip'.",
135+
show_default=True,
136+
)(func)
137+
138+
139+
def dask_n_workers(func):
140+
return click.option(
141+
"--dask-n-workers",
142+
type=int,
143+
default=None,
144+
help="The number of workers to use with dask.",
145+
)(func)
146+
147+
148+
def dask_args(func):
149+
return use_dask(dask_type(dask_n_workers(failure_mode(func))))
150+
151+
152+
def target(func):
153+
from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
154+
return click.option(
155+
"--target",
156+
type=click.Choice(TargetTags.get_values(), case_sensitive=True),
157+
help="The target for the workflow",
158+
required=True,
159+
)(func)
160+
161+
162+
def ligands(func):
163+
return click.option(
164+
"-l",
165+
"--ligands",
166+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
167+
help="File containing ligands",
168+
)(func)
169+
170+
171+
def output_dir(func):
172+
return click.option(
173+
"--output-dir",
174+
type=click.Path(
175+
resolve_path=True, exists=False, file_okay=False, dir_okay=True
176+
),
177+
help="The directory to output results to.",
178+
default="output",
179+
)(func)
180+
181+
182+
def overwrite(func):
183+
return click.option(
184+
"--overwrite/--no-overwrite",
185+
default=True,
186+
help="Whether to overwrite the output directory if it exists.",
187+
)(func)
188+
189+
190+
def input_json(func):
191+
return click.option(
192+
"--input-json",
193+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
194+
help="Path to a json file containing the inputs to the workflow, WARNING: overrides all other inputs.",
195+
)(func)
196+
197+
# flag to run all ml scorers
198+
def ml_score(func):
199+
return click.option(
200+
"--ml-score",
201+
is_flag=True,
202+
default=True,
203+
help="Whether to run all ml scorers",
204+
)(func)
205+
206+
207+
def fragalysis_dir(func):
208+
return click.option(
209+
"--fragalysis-dir",
210+
type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
211+
help="Path to a directory containing fragments to dock.",
212+
)(func)
213+
214+
215+
def structure_dir(func):
216+
return click.option(
217+
"--structure-dir",
218+
type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
219+
help="Path to a directory containing structures.",
220+
)(func)
221+
222+
223+
def pdb_file(func):
224+
return click.option(
225+
"--pdb-file",
226+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
227+
help="Path to a pdb file containing a structure",
228+
)(func)
229+
230+
231+
def cache_dir(func):
232+
return click.option(
233+
"--cache-dir",
234+
type=click.Path(
235+
resolve_path=True, exists=False, file_okay=False, dir_okay=True
236+
),
237+
help="Path to a directory where design units are cached.",
238+
)(func)
239+
240+
241+
def use_only_cache(func):
242+
return click.option(
243+
"--use-only-cache",
244+
is_flag=True,
245+
default=False,
246+
help="Whether to only use the cache.",
247+
)(func)
248+
249+
250+
def gen_cache_w_default(func):
251+
return click.option(
252+
"--gen-cache",
253+
type=click.Path(
254+
resolve_path=False, exists=False, file_okay=False, dir_okay=True
255+
),
256+
help="Path to a directory where a design unit cache should be generated.",
257+
default="prepped_structure_cache",
258+
)(func)
259+
260+
261+
def md(func):
262+
return click.option(
263+
"--md",
264+
is_flag=True,
265+
default=False,
266+
help="Whether to run MD",
267+
)(func)
268+
269+
270+
def md_steps(func):
271+
return click.option(
272+
"--md-steps",
273+
type=int,
274+
default=2500000,
275+
help="Number of MD steps",
276+
)(func)
277+
278+
def core_smarts(func):
279+
return click.option(
280+
"-cs",
281+
"--core-smarts",
282+
type=click.STRING,
283+
help="The SMARTS which should be used to select which atoms to constrain to the reference structure.",
284+
)(func)
285+
286+
287+
def save_to_cache(func):
288+
return click.option(
289+
"--save-to-cache/--no-save-to-cache",
290+
help="If the newly generated structures should be saved to the cache folder.",
291+
default=True,
292+
)(func)
293+
294+
295+
def loglevel(func):
296+
return click.option(
297+
"--loglevel",
298+
type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
299+
help="The log level to use.",
300+
default="INFO",
301+
show_default=True,
302+
)(func)
303+
304+
305+
def ref_chain(func):
306+
return click.option(
307+
"--ref-chain",
308+
type=str,
309+
default=None,
310+
help="Chain ID to align to in reference structure containing the active site.",
311+
)(func)
312+
313+
314+
def active_site_chain(func):
315+
return click.option(
316+
"--active-site-chain",
317+
type=str,
318+
default=None,
319+
help="Active site chain ID to align to ref_chain in reference structure",
320+
)(func)
321+
322+
from asapdiscovery.data.util.dask_utils import DaskType, FailureMode
323+
324+
if TYPE_CHECKING:
325+
from asapdiscovery.data.services.postera.manifold_data_validation import TargetTags
326+
327+
328+
@cli.command(
329+
"prep-protein-for-docking",
330+
help="Prep protein to make OE Design Units and corresponding schema.",
331+
)
332+
@target
333+
@click.option(
334+
"--align",
335+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
336+
help="Path to a reference structure to align to",
337+
)
338+
@ref_chain
339+
@active_site_chain
340+
@click.option(
341+
"--seqres-yaml",
342+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
343+
help="Path to a seqres yaml file to mutate to, if not specified will use the default for the target",
344+
)
345+
@click.option(
346+
"--loop-db",
347+
type=click.Path(resolve_path=True, exists=True, file_okay=True, dir_okay=False),
348+
help="Path to a loop database to use for prepping",
349+
)
350+
@click.option(
351+
"--oe-active-site-residue",
352+
type=str,
353+
help="OE formatted string of active site residue to use if not ligand bound",
354+
)
355+
@pdb_file
356+
@fragalysis_dir
357+
@structure_dir
358+
@click.option(
359+
"--cache-dir",
360+
help="The path to cached prepared complexes which can be used again.",
361+
type=click.Path(resolve_path=True, exists=True, file_okay=False, dir_okay=True),
362+
)
363+
@save_to_cache
364+
@dask_args
365+
@output_dir
366+
@input_json
367+
def protein_prep(
368+
target: "TargetTags",
369+
align: Optional[str] = None,
370+
ref_chain: Optional[str] = None,
371+
active_site_chain: Optional[str] = None,
372+
seqres_yaml: Optional[str] = None,
373+
loop_db: Optional[str] = None,
374+
oe_active_site_residue: Optional[str] = None,
375+
pdb_file: Optional[str] = None,
376+
fragalysis_dir: Optional[str] = None,
377+
structure_dir: Optional[str] = None,
378+
cache_dir: Optional[str] = None,
379+
save_to_cache: bool = True,
380+
use_dask: bool = False,
381+
dask_type: DaskType = DaskType.LOCAL,
382+
dask_n_workers: Optional[int] = None,
383+
failure_mode: FailureMode = FailureMode.SKIP,
384+
output_dir: str = "output",
385+
input_json: Optional[str] = None,
386+
):
387+
"""
388+
Run protein prep on a set of structures.
389+
"""
390+
from asapdiscovery.workflows.prep_workflows.protein_prep import (
391+
ProteinPrepInputs,
392+
protein_prep_workflow,
393+
)
394+
395+
if input_json is not None:
396+
print("Loading inputs from json file... Will override all other inputs.")
397+
inputs = ProteinPrepInputs.from_json_file(input_json)
398+
399+
else:
400+
inputs = ProteinPrepInputs(
401+
target=target,
402+
align=align,
403+
ref_chain=ref_chain,
404+
active_site_chain=active_site_chain,
405+
seqres_yaml=seqres_yaml,
406+
loop_db=loop_db,
407+
oe_active_site_residue=oe_active_site_residue,
408+
pdb_file=pdb_file,
409+
fragalysis_dir=fragalysis_dir,
410+
structure_dir=structure_dir,
411+
cache_dir=cache_dir,
412+
save_to_cache=save_to_cache,
413+
use_dask=use_dask,
414+
dask_type=dask_type,
415+
dask_n_workers=dask_n_workers,
416+
failure_mode=failure_mode,
417+
output_dir=output_dir,
418+
)
419+
420+
protein_prep_workflow(inputs)
75421

76422
# TODO: check for openeye installation, maybe make it a decorator
77423
@cli.command(
@@ -236,10 +582,10 @@ def process_bindingdb(input_directory, output_directory):
236582
"has_3d": mol.to_oemol().GetDimension() == 3,
237583
"num_atoms": mol.to_oemol().NumAtoms(),
238584
"smiles": mol.smiles,
239-
# "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
585+
# "pdb_id": mol.tags.get("PDB ID")[:4] # removed trailing space
240586
# if mol.tags.get("PDB ID") # removed trailing space
241-
"pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space
242-
if mol.tags.get("PDB ID(s) for Ligand-Target Complex")
587+
"pdb_id": mol.tags.get("PDB ID(s) for Ligand-Target Complex")[:4] # removed trailing space
588+
if mol.tags.get("PDB ID(s) for Ligand-Target Complex")
243589
else "",
244590
}
245591

0 commit comments

Comments
 (0)