Skip to content

Commit 63ac14d

Browse files
committed
Merge branch 'master' of github.com:InformaticsMatters/pipelines
2 parents c5f739b + 6a8bf9d commit 63ac14d

File tree

3 files changed

+126
-104
lines changed

3 files changed

+126
-104
lines changed

.travis.yml

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
#
99
# If you set PUBLISH_IMAGES you must also set the following: -
1010
#
11-
# DOCKER_USERNAME If PUBLISH_IMAGES is 'yes'
12-
# DOCKER_PASSWORD If PUBLISH_IMAGES is 'yes'
11+
# DOCKER_USERNAME
12+
# DOCKER_PASSWORD
1313
#
1414
# -----------------
1515
#
1616
# NOTE: Pull requests from foreign repositories will not
17-
# result in encrupted variables being set.
17+
# result in encrypted variables being set.
1818
# So, regardless of the state of PUBLISH_IMAGES,
1919
# images will only be published if DOCKER_PASSWORD is defined.
2020

@@ -23,39 +23,26 @@ services:
2323
- docker
2424

2525
stages:
26-
- name: test
2726
- name: publish latest
2827
if: |
2928
branch = master \
30-
AND env(PUBLISH_IMAGES) = yes \
31-
AND env(DOCKER_PASSWORD) IS present
29+
AND env(PUBLISH_IMAGES) = yes
3230
- name: publish tag
3331
if: |
3432
tag IS present \
35-
AND env(PUBLISH_IMAGES) = yes \
36-
AND env(DOCKER_PASSWORD) IS present
33+
AND env(PUBLISH_IMAGES) = yes
3734
- name: publish stable
3835
if: |
3936
tag IS present \
4037
AND tag =~ ^([0-9]+\.){1,2}[0-9]+$ \
41-
AND env(PUBLISH_IMAGES) = yes \
42-
AND env(DOCKER_PASSWORD) IS present
38+
AND env(PUBLISH_IMAGES) = yes
4339
4440
jobs:
4541
include:
4642

47-
- stage: test
48-
name: Test Local Image
49-
script:
50-
- docker build -t informaticsmatters/rdkit_pipelines:latest -f Dockerfile-rdkit .
51-
- docker build -t squonk/rdkit-pipelines-sdposter:latest -f Dockerfile-sdposter .
52-
- git clone https://github.com/InformaticsMatters/pipelines-utils.git
53-
- cd pipelines-utils/src/groovy
54-
- groovy PipelineTester.groovy -indocker
55-
5643
# Publish-stage jobs...
57-
# Every successful master build results in a latest image
58-
# and every tag results in a tagged image in Docker Hub.
44+
# Every successful master build results in a build (and test)
45+
# of the latest image and every tag results in a tagged image in Docker Hub.
5946
# Tags that match a RegEx are considered 'official' tags
6047
# and also result in a 'stable' image tag.
6148

@@ -65,29 +52,38 @@ jobs:
6552
# Build and push the pipelines-rdkit image and its sd-poster
6653
- docker build -t informaticsmatters/rdkit_pipelines:latest -f Dockerfile-rdkit .
6754
- docker build -t squonk/rdkit-pipelines-sdposter:latest -f Dockerfile-sdposter .
68-
- docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"
69-
- docker push informaticsmatters/rdkit_pipelines:latest
70-
- docker push squonk/rdkit-pipelines-sdposter:latest
55+
- git clone https://github.com/InformaticsMatters/pipelines-utils.git
56+
- cd pipelines-utils/src/groovy
57+
- groovy PipelineTester.groovy -indocker
58+
- if [ -n "$DOCKER_PASSWORD" ]; then
59+
docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD";
60+
docker push informaticsmatters/rdkit_pipelines:latest;
61+
docker push squonk/rdkit-pipelines-sdposter:latest;
62+
fi
7163

7264
- stage: publish tag
7365
name: Publish Tagged Image
7466
script:
7567
# Build and push the pipelines-rdkit image and its sd-poster
7668
- docker build -t informaticsmatters/rdkit_pipelines:${TRAVIS_TAG} -f Dockerfile-rdkit .
7769
- docker build -t squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG} -f Dockerfile-sdposter .
78-
- docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"
79-
- docker push informaticsmatters/rdkit_pipelines:${TRAVIS_TAG}
80-
- docker push squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG}
70+
- if [ -n "$DOCKER_PASSWORD" ]; then
71+
docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD";
72+
docker push informaticsmatters/rdkit_pipelines:${TRAVIS_TAG};
73+
docker push squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG};
74+
fi
8175

8276
- stage: publish stable
8377
name: Publish Stable Image
8478
script:
8579
# Pull the corresponding pipelines-rdkit image tag
8680
# and push it again as 'stable'
87-
- docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD"
88-
- docker pull informaticsmatters/rdkit_pipelines:${TRAVIS_TAG}
89-
- docker pull squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG}
90-
- docker tag informaticsmatters/rdkit_pipelines:${TRAVIS_TAG} informaticsmatters/rdkit_pipelines:stable
91-
- docker tag squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG} squonk/rdkit-pipelines-sdposter:stable
92-
- docker push informaticsmatters/rdkit_pipelines:stable
93-
- docker push squonk/rdkit-pipelines-sdposter:stable
81+
- if [ -n "$DOCKER_PASSWORD" ]; then
82+
docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD";
83+
docker pull informaticsmatters/rdkit_pipelines:${TRAVIS_TAG};
84+
docker pull squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG};
85+
docker tag informaticsmatters/rdkit_pipelines:${TRAVIS_TAG} informaticsmatters/rdkit_pipelines:stable;
86+
docker tag squonk/rdkit-pipelines-sdposter:${TRAVIS_TAG} squonk/rdkit-pipelines-sdposter:stable;
87+
docker push informaticsmatters/rdkit_pipelines:stable;
88+
docker push squonk/rdkit-pipelines-sdposter:stable;
89+
fi

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![Build Status](https://travis-ci.com/InformaticsMatters/pipelines.svg?branch=master)](https://travis-ci.com/InformaticsMatters/pipelines)
44
![GitHub release (latest SemVer including pre-releases)](https://img.shields.io/github/v/release/informaticsmatters/pipelines?include_prereleases)
55

6-
The project experiments with ways to generate data processing piplelines.
6+
The project experiments with ways to generate data processing pipelines.
77
The aim is to generate some re-usable building blocks that can be piped
88
together into more functional pipelines. Their prime initial use is as executors
99
for the Squonk Computational Notebook (http://squonk.it) though it is expected
@@ -22,7 +22,7 @@ be coming soon, including some from the Java ecosystem.
2222
* See [here](src/nextflow/README.md) for more info on running these in Nextflow.
2323

2424
Note: this is experimental, subject to change, and there are no guarantees that things work as expected!
25-
That said, its already proved to be highly useful in the Squonk Computational Notebook, and if you are interested let us know, and join the fun.
25+
That said, it's already proved to be highly useful in the Squonk Computational Notebook, and if you are interested let us know, and join the fun.
2626

2727
The code is licensed under the Apache 2.0 license.
2828

src/python/pipelines/xchem/xcos.py

Lines changed: 94 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -101,85 +101,111 @@ def getFeatureMapScore(small_m, large_m, score_mode=FeatMaps.FeatMapScoreMode.Al
101101

102102

103103
# This is the main XCOS function
104-
def getReverseScores(mols, frags, COS_threshold, writer):
104+
def getReverseScores(mols, frags, score_threshold, writer):
105105

106106
for mol in mols:
107-
107+
108108
# Get the bits
109109
compound_bits = getBits(mol)
110110

111111
all_scores = []
112112

113113
for bit in compound_bits:
114+
115+
# Let's remove wildcard atoms
116+
# Removing wildcard atoms does not impact feat score but does lower shape overlay
117+
# For scoring should multiply feat score by number of non-wilcard atoms and use
118+
# all atoms including wildcard for shape overlay
119+
bit_without_wildcard_atoms = Chem.DeleteSubstructs(bit, Chem.MolFromSmarts('[#0]'))
120+
121+
# Let's only score bits that have more than one atom (do not count wildcard atoms)
122+
# Get number of bit atoms without wildcard atoms
123+
no_bit_atoms_without_wild_card = bit_without_wildcard_atoms.GetNumAtoms()
114124

115125
# Get number of bit atoms
116126
no_bit_atoms = bit.GetNumAtoms()
117127

118-
scores = []
119-
120-
for frag_mol in frags:
121-
122-
# NB reverse SuCOS scoring
123-
fm_score = getFeatureMapScore(bit, frag_mol)
124-
fm_score = np.clip(fm_score, 0, 1)
125-
# Change van der Waals radius scale for stricter overlay
126-
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol, allowReordering=False, vdwScale=0.2)
127-
protrude_dist = np.clip(protrude_dist, 0, 1)
128-
129-
# Get frag name for linking to score
130-
frag_name = frag_mol.GetProp('_Name').strip('Mpro-')
131-
132-
# Check if MCS yield > 0 atoms
133-
mcs_match = rdFMCS.FindMCS([bit,frag_mol],ringMatchesRingOnly=True,matchValences=True)
134-
135-
# Get number of atoms in MCS match found
136-
no_mcs_atoms = Chem.MolFromSmarts(mcs_match.smartsString).GetNumAtoms()
137-
138-
if no_mcs_atoms == 0:
139-
140-
scores.append((frag_name, 0, no_bit_atoms))
141-
142-
if no_mcs_atoms > 0:
143-
144-
# NB reverse SuCOS scoring
145-
fm_score = getFeatureMapScore(bit, frag_mol)
146-
fm_score = np.clip(fm_score, 0, 1)
147-
148-
# Change van der Waals radius scale for stricter overlay
149-
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol,
150-
allowReordering=False,
151-
vdwScale=0.2)
152-
protrude_dist = np.clip(protrude_dist, 0, 1)
153-
154-
reverse_SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)
155-
156-
scores.append((frag_name, reverse_SuCOS_score, no_bit_atoms))
157-
158-
all_scores.append(scores)
159-
160-
list_dfs = []
161-
162-
for score in all_scores:
163-
164-
df = pd.DataFrame(data=score, columns=['Fragment', 'Score', 'No_bit_atoms'])
128+
# Only score if enough info in bit to describe a vector - this will bias against
129+
# cases where frag has long aliphatic chain
130+
131+
if no_bit_atoms_without_wild_card > 1:
165132

166-
# Get maximum scoring fragment for bit match
167-
df = df[df['Score'] == df['Score'].max()]
168-
list_dfs.append(df)
169-
170-
final_df = pd.concat(list_dfs)
171-
172-
# Score 1: the score is scaled by the number of bit atoms
173-
score_1 = (final_df.No_bit_atoms * final_df.Score).sum()
174-
175-
# Let's only get frags above a threshold
176-
final_df = final_df[final_df.Score > COS_threshold]
177-
178-
# Let#s sort the df by increasing score
179-
final_df = final_df.sort_values(by=['Score'], ascending=False)
180-
181-
# Get the unique fragments above threshold
182-
all_frags = pd.unique(final_df.Fragment)
133+
scores = []
134+
135+
for frag_mol in frags:
136+
137+
# Get frag name for linking to score
138+
frag_name = frag_mol.GetProp('_Name').strip('Mpro-')
139+
140+
# Score only if some common structure shared between bit and fragment.
141+
# Check if MCS yield > 0 atoms
142+
mcs_match = rdFMCS.FindMCS([bit,frag_mol], ringMatchesRingOnly=True, matchValences=True)
143+
144+
# Get mcs_mol from mcs_match
145+
mcs_mol = Chem.MolFromSmarts(mcs_match.smartsString)
146+
147+
# check if frag has MCS mol
148+
mcs_test = frag_mol.HasSubstructMatch(mcs_mol)
149+
150+
if mcs_test:
151+
152+
# Change van der Waals radius scale for stricter overlay
153+
protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol, allowReordering=False, vdwScale=0.2)
154+
protrude_dist = np.clip(protrude_dist, 0, 1)
155+
156+
protrude_score = 1 - protrude_dist
157+
158+
# We are comparing small bits relative to large frags
159+
# If overlay poor then assign score of 0
160+
# NB reverse SuCOS scoring. Feat map is also comp
161+
# more expensive
162+
163+
if protrude_score > score_threshold:
164+
165+
fm_score = getFeatureMapScore(bit, frag_mol)
166+
fm_score = np.clip(fm_score, 0, 1)
167+
168+
# What about good shape overlay but poor feat match?
169+
# Let's add a cutoff here to prevent good overlays with
170+
# poor feat match - eg. 3 mem ring 2 x C atoms overlay well
171+
# with 2 x aromatic ring Cs
172+
173+
if fm_score > score_threshold:
174+
# Use modified SuCOS score where feat_score scaled by number of bit atoms
175+
# without wildcard atoms and the shape overlay score by the number of bit atoms
176+
# including wildcard atoms
177+
scores.append((frag_name, protrude_score,no_bit_atoms,fm_score,no_bit_atoms_without_wild_card))
178+
else:
179+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
180+
else:
181+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
182+
else:
183+
scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
184+
185+
all_scores.append(scores)
186+
187+
list_dfs = []
188+
189+
for score in all_scores:
190+
191+
df = pd.DataFrame(data=score, columns = ['Fragment','Shape_score','no_bit_atoms','Feat_score','no_bit_atoms_without_wild_card'])
192+
193+
# Get maximum scoring fragment for bit match
194+
df['Modified_SuCOS_score'] = 0.5 * (df.Feat_score * df.no_bit_atoms_without_wild_card) + 0.5 * (df.Shape_score * df.no_bit_atoms)
195+
df = df[df['Modified_SuCOS_score'] == df['Modified_SuCOS_score'].max()]
196+
list_dfs.append(df)
197+
198+
final_df = pd.concat(list_dfs)
199+
200+
# Score 1: the score is scaled by the number of bit atoms
201+
score_1 = final_df.Modified_SuCOS_score.sum()
202+
203+
# Let's only get frags with a score > 0
204+
#final_df['SuCOS_score'] = 0.5 * final_df.Feat_score + 0.5 * final_df.Shape_score
205+
final_df = final_df[final_df.Modified_SuCOS_score > 0]
206+
207+
# Get the unique fragments above threshold
208+
all_frags = pd.unique(final_df.Fragment)
183209

184210
# Add props we want
185211
mol.SetProp(field_XCosRefMols, ','.join(all_frags))
@@ -204,8 +230,8 @@ def process(molecules, fragments, writer):
204230
else:
205231
utils.log('Using', len(frag_mol_list), 'fragments. No errors')
206232

207-
#mols, frags, COS_threshold, writer
208-
getReverseScores(molecules, frag_mol_list, 0.40, writer)
233+
#mols, frags, score_threshold, writer
234+
getReverseScores(molecules, frag_mol_list, 0.5, writer)
209235

210236

211237
def main():

0 commit comments

Comments
 (0)