Skip to content

Commit 2effa92

Browse files
authored
Update findseq.py
1. Fixing the error caused by insertion code in the residue id (e.g. 5fyj, '138A') 2. Add an additional example to show the power of regular expression.
1 parent 67927e9 commit 2effa92

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

findseq.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@
5252
# this ends up finding the sequence, GMSSHGLQWY
5353
findseq GMS.*QWY, 1a3h, sele
5454
55+
# find the Potential N-linked glycosylation sites in 5fyj
56+
fetch 5fyj
57+
findseq N(?=[^P][ST]), 5fyj and chain G+B, 5fyj_pngs
58+
5559
NOTES:
5660
Assumes we're using the ONE LETTER amino acid abbreviations.
5761
@@ -342,7 +346,7 @@ def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
342346
aaDict = {'aaList': []}
343347
cmd.iterate("(name ca) and __h", "aaList.append((resi,resn,chain))", space=aaDict)
344348

345-
IDs = [int(x[0]) for x in aaDict['aaList']]
349+
IDs = [x[0] for x in aaDict['aaList']]
346350
AAs = ''.join([one_letter[x[1]] for x in aaDict['aaList']])
347351
chains = [x[2] for x in aaDict['aaList']]
348352

@@ -363,9 +367,9 @@ def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
363367
chain = i_chains[0]
364368
# Only apply chains to selection algebra if there are defined chains.
365369
if chain:
366-
cmd.select(rSelName, rSelName + " or (__h and i. " + str(IDs[start]) + "-" + str(IDs[stop - 1]) + " and c. " + chain + " )")
370+
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + " and c. " + chain + " )")
367371
else:
368-
cmd.select(rSelName, rSelName + " or (__h and i. " + str(IDs[start]) + "-" + str(IDs[stop - 1]) + ")")
372+
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + ")")
369373
if int(firstOnly):
370374
break
371375
cmd.delete("__h")

0 commit comments

Comments
 (0)