Skip to content

Commit 488e427

Browse files
committed
improvements to tethering. rmsd filter.
1 parent b3bc837 commit 488e427

File tree

5 files changed

+351
-81
lines changed

5 files changed

+351
-81
lines changed

.dockerignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ gradle
44
openshift
55
work
66
.nextflow
7+
trace.txt*
8+
report.html*
9+
wip

src/nextflow/xchem/prepare-tether-featurestein.nf

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env nextflow
22

3-
params.candidates = "data/mpro/expanded-17.json"
3+
params.smiles = '*.smi'
4+
params.molfiles = '*.mol'
45
params.fragments = "data/mpro/hits-17.sdf.gz"
56
params.chunk_tether = 250
67
params.chunk_score = 10000
@@ -9,7 +10,9 @@ params.digits = 4
910
params.generate_filenames = false
1011
params.num_conformers = 10
1112

12-
candidates = file(params.candidates)
13+
// files
14+
smilesfiles = file(params.smiles)
15+
molfiles = file(params.molfiles)
1316
fragments = file(params.fragments)
1417

1518
process generate_feat_maps {
@@ -27,19 +30,27 @@ process generate_feat_maps {
2730
"""
2831
}
2932

30-
process split_json {
33+
process splitter {
3134

3235
container 'informaticsmatters/rdkit_pipelines:latest'
3336

3437
input:
35-
file candidates
38+
file smiles from smilesfiles.flatten()
39+
file mol from molfiles.flatten()
3640

3741
output:
38-
file '*.smi' into smiles
3942
file '*.mol' into mols
43+
file '*.smi' into smiles
4044

4145
"""
42-
python -m pipelines.xchem.split_fragnet_candidates -i '$candidates' ${params.generate_filenames ? '--generate-filenames' : ''}
46+
stem=${smiles.name[0..-5]}
47+
split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_
48+
mv $smiles ${smiles}.orig
49+
for f in *.smi
50+
do
51+
cp $mol \${f:0:-4}.mol
52+
done
53+
mv $mol ${mol}.orig
4354
"""
4455
}
4556

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,63 @@
11
#!/usr/bin/env nextflow
22

3-
params.candidates = "data/mpro/expanded-17.json"
4-
params.chunk = 1000
3+
params.smiles = '*.smi'
4+
params.molfiles = '*.mol'
5+
params.chunk_tether = 250
6+
params.chunk_score = 10000
57
params.limit = 0
6-
params.digits = 4
7-
params.generate_filenames = false
8+
params.num_conformers = 10
9+
params.atom_compare = 'CompareElements'
10+
params.bond_compare = 'CompareOrder'
11+
params.complete_rings_only = true
12+
params.ring_matches_ring_only = true
13+
params.minimize = 4
814

9-
candidates = file(params.candidates)
1015

11-
process split_json {
16+
smilesfiles = file(params.smiles)
17+
molfiles = file(params.molfiles)
18+
19+
process splitter {
1220

1321
container 'informaticsmatters/rdkit_pipelines:latest'
1422

1523
input:
16-
file candidates
24+
file smiles from smilesfiles.flatten()
25+
file mol from molfiles.flatten()
1726

1827
output:
19-
file '*.smi' into smiles
2028
file '*.mol' into mols
29+
file '*.smi' into smiles
2130

2231
"""
23-
python -m pipelines.xchem.split_fragnet_candidates -i '$candidates' ${params.generate_filenames ? '--generate-filenames' : ''}
32+
stem=${smiles.name[0..-5]}
33+
split -l $params.chunk_tether -d -a 3 --additional-suffix .smi $smiles \${stem}_
34+
mv $smiles ${smiles}.orig
35+
for f in *.smi
36+
do
37+
cp $mol \${f:0:-4}.mol
38+
done
39+
mv $mol ${mol}.orig
2440
"""
2541
}
2642

2743
process tether {
2844

2945
container 'informaticsmatters/rdkit_pipelines:latest'
46+
publishDir '.'
3047

3148
input:
32-
file smiles from smiles.flatten() //collect().toSortedList().flatten()
33-
file mol from mols.flatten() //collect().toSortedList().flatten()
49+
file smiles from smiles.flatten()
50+
file mol from mols.flatten()
3451

3552
output:
36-
file 'Tethered_*.sdf'
53+
file 'Tethered_*.sdf' into tethered_parts
54+
3755
"""
38-
python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' -o 'Tethered_${smiles.name[0..-5]}'
56+
python -m pipelines.xchem.prepare_tether --smi '$smiles' --mol '$mol' --chunk-size $params.chunk_score\
57+
--num-conformers $params.num_conformers -o 'Tethered_${smiles.name[0..-5]}'\
58+
--atom-compare $params.atom_compare --bond-compare $params.bond_compare\
59+
${params.complete_rings_only ? '--complete-rings-only' : ''}\
60+
${params.ring_matches_ring_only ? '--ring-matches-ring-only' : ''}\
61+
--minimize $params.minimize
3962
"""
4063
}

0 commit comments

Comments
 (0)