Skip to content

Commit 69f987f

Browse files
committed
fixed chuk size for prepare_tether.py
1 parent 1a4c1da commit 69f987f

File tree

1 file changed

+92
-9
lines changed

1 file changed

+92
-9
lines changed

src/python/pipelines/xchem/prepare_tether.py

Lines changed: 92 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ def get_writer(outfile_base):
5959
global file_count
6060

6161
if not writer:
62-
writer = Chem.SDWriter(outfile_base + '_' + f'{file_count:04}' + '.sdf')
62+
writer = Chem.SDWriter(outfile_base + '_' + f'{file_count:03}' + '.sdf')
6363

64-
if write_count < chunk_size:
64+
if write_count <= chunk_size:
6565
# print(' Using existing writer for', id, writer)
6666
return writer
6767

@@ -73,7 +73,7 @@ def get_writer(outfile_base):
7373
writer.close()
7474
file_count += 1
7575

76-
writer = Chem.SDWriter(outfile_base + '_' + f'{file_count:04}' + '.sdf')
76+
writer = Chem.SDWriter(outfile_base + '_' + f'{file_count:03}' + '.sdf')
7777
#print(' Using new writer for', id, writer)
7878
return writer
7979

@@ -361,11 +361,89 @@ def minimize_mol(mol, target, molMatch, targetMatch, algMap, getForceField):
361361
rms = AlignMol(mol, target, atomMap=algMap)
362362
mol.SetDoubleProp('EmbedRMS', rms)
363363

364+
def find_best_mcs(hit, mol):
365+
best_score = 0
366+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, ringMatchesRingOnly=True,
367+
atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareOrder)
368+
score = score_mcs(hit, mol, mcs)
369+
if score > best_score:
370+
best_score = score
371+
best_mcs = mcs
372+
best_index = 1
373+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, ringMatchesRingOnly=False,
374+
atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareOrder)
375+
score = score_mcs(hit, mol, mcs)
376+
if score > best_score:
377+
best_score = score
378+
best_mcs = mcs
379+
best_index = 2
380+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=True,
381+
atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareOrder)
382+
score = score_mcs(hit, mol, mcs)
383+
if score > best_score:
384+
best_score = score
385+
best_mcs = mcs
386+
best_index = 3
387+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=False,
388+
atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareOrder)
389+
score = score_mcs(hit, mol, mcs)
390+
if score > best_score:
391+
best_score = score
392+
best_mcs = mcs
393+
best_index = 4
394+
395+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, ringMatchesRingOnly=True,
396+
atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
397+
score = score_mcs(hit, mol, mcs)
398+
if score > best_score:
399+
best_score = score
400+
best_mcs = mcs
401+
best_index = 5
402+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, ringMatchesRingOnly=False,
403+
atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
404+
score = score_mcs(hit, mol, mcs)
405+
if score > best_score:
406+
best_score = score
407+
best_mcs = mcs
408+
best_index = 6
409+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=True,
410+
atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
411+
score = score_mcs(hit, mol, mcs)
412+
if score > best_score:
413+
best_score = score
414+
best_mcs = mcs
415+
best_index = 7
416+
mcs = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=False,
417+
atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
418+
score = score_mcs(hit, mol, mcs)
419+
if score > best_score:
420+
best_score = score
421+
best_mcs = mcs
422+
best_index = 8
423+
print(" Best MCS is", best_index, 'with score', best_score)
424+
return best_mcs
425+
426+
def score_mcs(hit, mol, mcs):
427+
smartsMol = Chem.MolFromSmarts(mcs.smartsString)
428+
hitMatches = hit.GetSubstructMatch(smartsMol)
429+
molMatches = mol.GetSubstructMatch(smartsMol)
430+
# print(' Matches:', hitMatches, molMatches)
431+
score = 0
432+
for hitMatch, molMatch in zip(hitMatches, molMatches):
433+
score += 1
434+
if hit.GetAtomWithIdx(hitMatch).GetAtomicNum() == mol.GetAtomWithIdx(molMatch).GetAtomicNum():
435+
score += 1
436+
if mol.GetAtomWithIdx(molMatch).IsInRing():
437+
score += 1
438+
print(' Score:', score)
439+
return score
364440

365441
def execute(smi, hit_molfile, outfile_base, min_ph=None, max_ph=None, max_inputs=0, max_outputs=0, modulus=0, timout_embed_secs=5):
366442

367443
global write_count
368444

445+
GetFF=lambda x,confId=-1:AllChem.MMFFGetMoleculeForceField(x,AllChem.MMFFGetMoleculeProperties(x),confId=confId)
446+
369447
hit = Chem.MolFromMolFile(hit_molfile)
370448

371449
num_mols = 0
@@ -395,6 +473,8 @@ def execute(smi, hit_molfile, outfile_base, min_ph=None, max_ph=None, max_inputs
395473
num_processed += 1
396474
num_added = 0
397475

476+
w = get_writer(outfile_base)
477+
398478
print('Processing', num_mols, num_processed, smiles, Chem.MolToSmiles(hit))
399479

400480
try:
@@ -406,14 +486,19 @@ def execute(smi, hit_molfile, outfile_base, min_ph=None, max_ph=None, max_inputs
406486
else:
407487
enumerated_mols = [mol]
408488

409-
# mcs0 = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, matchValences=False, ringMatchesRingOnly=True,
410-
# atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
411-
mcs0 = rdFMCS.FindMCS([hit, mol], completeRingsOnly=True, ringMatchesRingOnly=True)
489+
mcs0 = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=False,
490+
atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny)
491+
# mcs0 = rdFMCS.FindMCS([hit, mol], completeRingsOnly=False, ringMatchesRingOnly=False)
492+
# score_mcs(hit, mol, mcs0)
493+
494+
# mcs0 = find_best_mcs(hit, mol)
495+
412496
mcsQuery = Chem.MolFromSmarts(mcs0.smartsString)
413497

414498
count = 0
415499
for ligand in enumerated_mols:
416500
molh = Chem.AddHs(ligand)
501+
# mol_match_tuple = MultiConstrainedEmbed(molh, queryMol, getForceField=GetFF)
417502
mol_match_tuple = multi_constrained_embed(molh, hit, mcsQuery, timout_embed_secs=timout_embed_secs)
418503
print(' ', len(mol_match_tuple), 'mols tethered')
419504

@@ -429,10 +514,8 @@ def execute(smi, hit_molfile, outfile_base, min_ph=None, max_ph=None, max_inputs
429514
t_mol.SetProp('TETHERED ATOMS', tethers)
430515
# print(' Tethers: ', tethers)
431516

432-
w = get_writer(outfile_base)
433517
w.write(t_mol)
434518
write_count += 1
435-
print(' Write count =', write_count)
436519
num_outputs += 1
437520
num_added += 1
438521

@@ -455,7 +538,7 @@ def execute(smi, hit_molfile, outfile_base, min_ph=None, max_ph=None, max_inputs
455538
def main():
456539
"""
457540
Example usage:
458-
python -m pipelines.xchem.prepare_tether --smi ../../data/mpro/Mpro-x0387_0.smi --mol ../../data/mpro/Mpro-x0387_0.mol -o TETHERED --max-inputs 500 --chunk-size 100
541+
python -m pipelines.xchem.prepare_tether_2 --smi ../../data/mpro/Mpro-x0387_0.smi --mol ../../data/mpro/Mpro-x0387_0.mol -o TETHERED --max-inputs 500 --chunk-size 100
459542
460543
:return:
461544
"""

0 commit comments

Comments
 (0)