Skip to content

Commit f85ac42

Browse files
committed
refactor: all scripts
1 parent 966cedf commit f85ac42

File tree

13 files changed

+206
-157
lines changed

13 files changed

+206
-157
lines changed

workflow/scripts/ResultTxt.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
import os
66
import pandas as pd
77

8-
infile = snakemake.input[0]
8+
infile = snakemake.input[0]
99
outfile = snakemake.output[0]
1010

1111
linesep = os.linesep
1212

13+
1314
def pairwise(iterable):
1415
"""
1516
:param iterable:
@@ -19,38 +20,40 @@ def pairwise(iterable):
1920
next(b, None)
2021
return zip(a, b)
2122

23+
2224
def extract_pdbqt(infile):
2325
"""
2426
:param infile: path to infile
2527
:param outfile: path to outfile
2628
:return: receptor name, IDs and enthalpies
2729
"""
28-
target_IDs = []
29-
enthalpies = []
30-
references = set()
31-
with open(infile, encoding='utf-8') as to_parse:
30+
target_IDs = []
31+
enthalpies = []
32+
references = set()
33+
with open(infile, encoding="utf-8") as to_parse:
3234
# this line reads: 'REMARK RECEPTOR path/to/target.pdbqt'
3335
headerline = to_parse.readline()
3436
# we want to extract the target name WITHOUT the suffix
35-
receptor = Path(headerline.split(' ')[-1]).stem
37+
receptor = Path(headerline.split(" ")[-1]).stem
3638
# we then proceed extracting the enthalpy per ligand
3739
# we iterate two line per iteration, because the file is structured, like:
3840
#
3941
# REMARK VINA RESULT: Enthalpy 1 Enthalpy 2 Enthalpy 3
4042
# REMARK Name = <name>
4143
for i, j in pairwise(to_parse):
42-
if 'Name' in j:
43-
ID = j.split(' = ')[-1].strip(linesep)
44+
if "Name" in j:
45+
ID = j.split(" = ")[-1].strip(linesep)
4446
if ID not in references:
4547
references.add(ID)
4648
target_IDs.append(ID)
4749
enthalpies.append(i.split()[3])
4850

4951
return receptor, target_IDs, enthalpies
5052

51-
if __name__ == '__main__':
53+
54+
if __name__ == "__main__":
5255
receptor, target_IDs, enthalpies = extract_pdbqt(infile)
5356
# putting into data frame:
54-
out_df = pd.DataFrame(enthalpies, index = target_IDs, columns = [receptor])
57+
out_df = pd.DataFrame(enthalpies, index=target_IDs, columns=[receptor])
5558
# finally, save the output
5659
out_df.to_csv(outfile)

workflow/scripts/ZINCdownload.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""download ligands in pdbqt format from ZINC database"""
2+
23
import os
34
import requests
45

workflow/scripts/concatPDBQT.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""read all files in input folder and concat them together"""
2+
23
import os
34
from snakemake.shell import shell
45

workflow/scripts/downloadZINCsubsets.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
'''download ligand subsets in mol2 format from ZINC database'''
1+
"""download ligand subsets in mol2 format from ZINC database"""
22

33
import requests
44

55
out = snakemake.output[0]
66
subset = snakemake.params.sub
77

8-
url = ''.join(["https://zinc15.docking.org/substances/subsets/", subset, ".mol2?count=all"])
8+
url = "".join(
9+
["https://zinc15.docking.org/substances/subsets/", subset, ".mol2?count=all"]
10+
)
911

1012
r = requests.get(url, timeout=60)
1113
with open(out, "wb") as outfile:

workflow/scripts/generateIRODS.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import pandas as pd
55

6-
target1 = snakemake.config["TARGETS"]
6+
target1 = snakemake.config["TARGETS"]
77
targets = snakemake.config["RESCREENING_TARGETS"]
88

99
name = snakemake.config["EXPERIMENT_NAME"]
@@ -37,5 +37,5 @@
3737

3838
out_dict.update(results)
3939

40-
with open(outfile, "w", encoding = "utf-8") as f:
41-
json.dump(out_dict, f, sort_keys = False, indent = 4)
40+
with open(outfile, "w", encoding="utf-8") as f:
41+
json.dump(out_dict, f, sort_keys=False, indent=4)

workflow/scripts/makeHistogram.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import pandas as pd
66
from matplotlib import pyplot as plt
77

8+
89
def pairwise(iterable):
910
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
10-
a, b= itertools.tee(iterable)
11+
a, b = itertools.tee(iterable)
1112
next(b, None)
1213
return zip(a, b)
1314

15+
1416
def getHistogram(in_pdbqt, low, high, step):
1517
"""creates histogram of strutures in_pdbqt.
1618
@@ -25,15 +27,15 @@ def getHistogram(in_pdbqt, low, high, step):
2527
step : float
2628
size of binding energy buckets.
2729
"""
28-
f = gzip.open(in_pdbqt, 'r')
30+
f = gzip.open(in_pdbqt, "r")
2931
lowerBound = low
3032
bins = []
31-
list1 =[]
33+
list1 = []
3234
while lowerBound < high:
3335
bins.append(lowerBound)
3436
lowerBound = lowerBound + step
3537
bins = [round(num, 2) for num in bins]
36-
for a,b in pairwise(f):
38+
for a, b in pairwise(f):
3739
if "REMARK VINA RESULT:" in str(b):
3840
if "MODEL 1" in str(a):
3941
tmp = b.split()
@@ -46,6 +48,7 @@ def getHistogram(in_pdbqt, low, high, step):
4648
plt.ylabel("frequency")
4749
fig = plt.gcf()
4850
fig.set_size_inches(10, 5)
49-
plt.savefig(snakemake.output[0], dpi = 300)
51+
plt.savefig(snakemake.output[0], dpi=300)
52+
5053

5154
getHistogram(snakemake.input[0], -13, -3, 0.2)

workflow/scripts/mergeOutput.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
"""merge all docking output files together"""
2+
23
import shutil
34
import subprocess
45

56
outFile = snakemake.output[0]
67
inFiles = snakemake.input
78

8-
#TODO: change to ZIPFILE to support better result portability
9-
with open(outFile, 'wb') as outF:
9+
# TODO: change to ZIPFILE to support better result portability
10+
with open(outFile, "wb") as outF:
1011
for file in inFiles:
1112
try:
12-
out = subprocess.check_output(['gunzip', '-t', file]) #test if gzip file is valid before merging
13+
out = subprocess.check_output(
14+
["gunzip", "-t", file]
15+
) # test if gzip file is valid before merging
1316

14-
#TODO: avoid overwriting the file descriptor!
15-
with open(file, 'rb') as sourceFile:
17+
# TODO: avoid overwriting the file descriptor!
18+
with open(file, "rb") as sourceFile:
1619
shutil.copyfileobj(sourceFile, outF)
1720
except subprocess.CalledProcessError:
18-
print(f'Erros occured during the docking of {file}')
21+
print(f"Erros occured during the docking of {file}")

workflow/scripts/prepareDocking.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
preparation of ligand input file for VinaLC containing the path
33
to every ligand with same weigth + logP
44
"""
5+
56
import os
67

78
input_directory = snakemake.input.in_dir
@@ -15,7 +16,13 @@
1516
for purchase in snakemake.config["ZINC_INPUT"]["PURCHASE"]:
1617
for ph in snakemake.config["ZINC_INPUT"]["PH"]:
1718
for charge in snakemake.config["ZINC_INPUT"]["CHARGE"]:
18-
fname = weightlog + react + purchase + ph + charge + ".pdbqt"
19+
fname = (
20+
weightlog + react + purchase + ph + charge + ".pdbqt"
21+
)
1922
ligand_file = os.path.join(input_directory, fname)
20-
with open(os.path.join(snakemake.output.library ),"r", encoding='utf-8') as file_object:
23+
with open(
24+
os.path.join(snakemake.output.library),
25+
"r",
26+
encoding="utf-8",
27+
) as file_object:
2128
file_object.write(ligand_file + "\n")

workflow/scripts/prepareReceptor.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
from Bio.PDB import PDBParser, PDBIO
55

6+
67
def removeChains(model, chainlist):
78
"""
89
removes chains not specified in chainlist from model
@@ -20,7 +21,7 @@ def removeChains(model, chainlist):
2021
chain_to_remove = []
2122
for chain in model:
2223
for residue in chain:
23-
if residue.id[0] != ' ':
24+
if residue.id[0] != " ":
2425
residue_to_remove.append((chain.id, residue.id))
2526
if not chain:
2627
chain_to_remove.append(chain.id)
@@ -39,9 +40,9 @@ def prepareRec(inputfile, outputfile, target):
3940
select chains to delete depending on config definition
4041
"""
4142
print(target)
42-
ID = target.split(',')
43+
ID = target.split(",")
4344
chains = ID[1].split(" ")
44-
parser = PDBParser()#MMCIFParser()
45+
parser = PDBParser() # MMCIFParser()
4546
structure = parser.get_structure(ID[0], inputfile)
4647
model = structure[0]
4748
removeChains(model, chains)
@@ -52,12 +53,13 @@ def prepareRec(inputfile, outputfile, target):
5253
print("printing outfile")
5354
io.save(out)
5455

55-
head,tail = os.path.split(snakemake.input[0])
56+
57+
head, tail = os.path.split(snakemake.input[0])
5658
filename = tail.split(".")[0]
5759

5860
if any(filename in target for target in snakemake.config["TARGETS"]):
59-
print('filename in targets')
60-
prepareRec(snakemake.input[0], snakemake.output[0], snakemake.config["TARGETS"][0])
61+
print("filename in targets")
62+
prepareRec(snakemake.input[0], snakemake.output[0], snakemake.config["TARGETS"][0])
6163

6264
elif filename in str(snakemake.config["RESCREENING_TARGETS"]):
6365
for target in snakemake.config["RESCREENING_TARGETS"]:

workflow/scripts/sortResult.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,64 @@
11
"""sorting and outputting the N>1 best results from a docking run"""
2+
23
import subprocess
34
import gzip
45
import os
56
from math import ceil
7+
68
inPath = snakemake.input[0]
79
outFile = snakemake.output[0]
810
num = float(snakemake.config["RESULT_NUMBER"])
911

10-
count=0
11-
lineN=0
12-
ids=0
13-
swapFlg=False
14-
scoreList=[]
15-
strucList=[]
16-
tempList=[]
12+
count = 0
13+
lineN = 0
14+
ids = 0
15+
swapFlg = False
16+
scoreList = []
17+
strucList = []
18+
tempList = []
1719

1820
getLigNum = "zgrep -c 'REMARK RECEPTOR' " + inPath
1921
ligand_num = int(subprocess.check_output(getLigNum, shell=True))
2022

2123
if num > 1:
2224
listSize = num
2325
else:
24-
total=float(ligand_num)
25-
listSize = ceil(num*total)
26+
total = float(ligand_num)
27+
listSize = ceil(num * total)
2628

27-
with gzip.open(inPath, 'rt', encoding='utf-8') as inFile:
29+
with gzip.open(inPath, "rt", encoding="utf-8") as inFile:
2830
for line in inFile:
2931
if "REMARK RECEPTOR" in line:
30-
lineN=0
31-
if count>0:
32-
if count<=listSize:
32+
lineN = 0
33+
if count > 0:
34+
if count <= listSize:
3335
strucList.append(tempList)
3436
elif swapFlg:
35-
strucList[ids]=tempList
36-
swapFlg=False
37-
count=count+1
38-
tempList=[]
39-
if lineN==3:
40-
strs=line.split()
41-
curValue=float(strs[3])
42-
if count<=listSize:
37+
strucList[ids] = tempList
38+
swapFlg = False
39+
count = count + 1
40+
tempList = []
41+
if lineN == 3:
42+
strs = line.split()
43+
curValue = float(strs[3])
44+
if count <= listSize:
4345
scoreList.append(curValue)
4446
else:
45-
maxValue=max(scoreList)
46-
if curValue<maxValue:
47-
swapFlg=True
48-
ids=scoreList.index(maxValue)
49-
scoreList[ids]=curValue
47+
maxValue = max(scoreList)
48+
if curValue < maxValue:
49+
swapFlg = True
50+
ids = scoreList.index(maxValue)
51+
scoreList[ids] = curValue
5052
tempList.append(line)
51-
lineN=lineN+1
52-
scoreDict={}
53+
lineN = lineN + 1
54+
scoreDict = {}
5355

5456
for index, item in enumerate(scoreList):
55-
scoreDict[index]=item
57+
scoreDict[index] = item
5658

57-
sortList=sorted(scoreDict, key=scoreDict.get)
58-
with open(outFile, 'w', encoding='utf-8') as outF:
59+
sortList = sorted(scoreDict, key=scoreDict.get)
60+
with open(outFile, "w", encoding="utf-8") as outF:
5961
for index in sortList:
6062
for line in strucList[index]:
6163
outF.write(line)
62-
os.chmod(outFile, 0o400) ## TODO: remove after included in Snakemake
64+
os.chmod(outFile, 0o400) ## TODO: remove after included in Snakemake

0 commit comments

Comments
 (0)