Skip to content

Commit 663122c

Browse files
committed
update & format scripts
1 parent 1b7d968 commit 663122c

File tree

7 files changed

+321
-118
lines changed

7 files changed

+321
-118
lines changed

scripts/generate_assets.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import glob
2+
import json
3+
import os
4+
5+
from openbabel import openbabel
6+
from tqdm import tqdm
7+
8+
# workdir = "./src/assets/blocks/chem-437"
9+
workdir = "./src/assets/blocks/opv"
10+
11+
12+
def generate_assets(key, smiles):
13+
ob_conv = openbabel.OBConversion()
14+
ob_conv.SetInFormat("smi")
15+
16+
mol = openbabel.OBMol()
17+
ob_conv.ReadString(mol, smiles)
18+
mol.AddHydrogens()
19+
openbabel.OBBuilder().Build(mol)
20+
21+
ob_conv.SetOutFormat("mol2")
22+
mol2 = ob_conv.WriteString(mol)
23+
24+
ob_conv.SetOutFormat("svg")
25+
svg = ob_conv.WriteString(mol)
26+
27+
with open(os.path.join(workdir, "smi", f"{key}.smi"), "w") as f:
28+
f.write(smiles)
29+
30+
with open(os.path.join(workdir, "svg", f"{key}.svg"), "w") as f:
31+
f.write(svg)
32+
33+
with open(os.path.join(workdir, "mol2", f"{key}.mol2"), "w") as f:
34+
f.write(mol2)
35+
36+
37+
def main():
38+
with open(os.path.join(workdir, "data.json")) as f:
39+
block_set = json.load(f)
40+
for subdir in ["smi", "svg", "mol2"]:
41+
os.makedirs(os.path.join(workdir, subdir), exist_ok=True)
42+
for filepath in glob.glob(os.path.join(workdir, subdir, "*")):
43+
os.remove(filepath)
44+
for entry in tqdm(block_set["table"].values()):
45+
if "0" in entry["key"].split(":"):
46+
continue
47+
if "smiles" in entry:
48+
key = entry["key"].replace(":", "_")
49+
generate_assets(key, entry["smiles"])
50+
51+
52+
if __name__ == "__main__":
53+
main()

scripts/process_block_svg.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77

8+
# workdir = "./src/assets/blocks/opv"
89
workdir = "./src/assets/blocks/chem-437"
910

1011

@@ -48,7 +49,7 @@ def get_arc_bbox(
4849
(rx**2 * ry**2 - rx**2 * y1_**2 - ry**2 * x1_**2)
4950
/ (
5051
rx**2 * y1_**2 + ry**2 * x1_**2
51-
), # this could become negative, probably due to rounding error
52+
), # this could become negative, probably due to rounding error
5253
)
5354
)
5455
if large_arc_flag == sweep_flag:

scripts/process_color_wheel.py

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,35 @@
55
from rdkit.Chem import CanonSmiles, MolFromSmiles
66
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
77

8-
from utils import get_svg_dimensions, naive_combine
8+
from utils import get_svg_dimensions, naive_combine, resolve_functional_property_ranges
99

10-
workdir = './src/assets/blocks/10x10x10palette'
10+
workdir = "./src/assets/blocks/10x10x10palette"
1111

12-
with open(os.path.join(workdir, 'block_set.json')) as file:
12+
with open(os.path.join(workdir, "block_set.json")) as file:
1313
block_set = json.load(file)
1414

1515
blocks_by_index = [[None], [None], [None]]
1616

1717

1818
def process_blocks():
1919
processed_blocks = [[], [], []]
20-
for block in block_set['blocks']:
21-
blocks_by_index[block['index']].append(block)
22-
23-
_, _, width, height = get_svg_dimensions(block['svgUrl'])
24-
processed_blocks[int(block['index'])].append({
25-
'index': block['index'],
26-
'id': block['id'],
27-
'svgUrl': block['svgUrl'],
28-
'width': width,
29-
'height': height
30-
})
20+
for block in block_set["blocks"]:
21+
blocks_by_index[block["index"]].append(block)
22+
23+
_, _, width, height = get_svg_dimensions(block["svgUrl"])
24+
processed_blocks[int(block["index"])].append(
25+
{
26+
"index": block["index"],
27+
"id": block["id"],
28+
"svgUrl": block["svgUrl"],
29+
"width": width,
30+
"height": height,
31+
}
32+
)
3133

3234
for blocks in processed_blocks:
33-
blocks.sort(key=lambda b: b['id'])
34-
block_set['blocks'] = processed_blocks
35+
blocks.sort(key=lambda b: b["id"])
36+
block_set["blocks"] = processed_blocks
3537

3638

3739
def get_smiles(donor, bridge, acceptor):
@@ -42,59 +44,54 @@ def get_smiles(donor, bridge, acceptor):
4244
blocks = [block for block in blocks if block]
4345

4446
if len(blocks) == 1:
45-
return CanonSmiles(blocks[0]['properties']['smiles'])
47+
return CanonSmiles(blocks[0]["properties"]["smiles"])
4648

4749
if not donor or not bridge or not acceptor:
48-
return ''
50+
return ""
4951

50-
start = chr(ord('A') + (donor['id'] - 1))
51-
mid = bridge['id']
52-
end = chr(ord('K') + (acceptor['id'] - 1))
52+
start = chr(ord("A") + (donor["id"] - 1))
53+
mid = bridge["id"]
54+
end = chr(ord("K") + (acceptor["id"] - 1))
5355

54-
filename = workdir + f'/smi/{start}_{mid}_{end}.smi'
56+
filename = workdir + f"/smi/{start}_{mid}_{end}.smi"
5557
with open(filename) as f:
5658
smiles = f.read().strip()
5759
return smiles
5860

5961

6062
def generate_lookup_table():
61-
block_set['table'] = {}
63+
block_set["table"] = {}
6264

6365
for donor, bridge, acceptor in itertools.product(*blocks_by_index):
64-
d_id = donor['id'] if donor else 0
65-
b_id = bridge['id'] if bridge else 0
66-
a_id = acceptor['id'] if acceptor else 0
67-
key = f'{d_id}:{b_id}:{a_id}'
66+
d_id = donor["id"] if donor else 0
67+
b_id = bridge["id"] if bridge else 0
68+
a_id = acceptor["id"] if acceptor else 0
69+
key = f"{d_id}:{b_id}:{a_id}"
6870

6971
smiles = get_smiles(donor, bridge, acceptor)
7072
chemical_formula = CalcMolFormula(MolFromSmiles(smiles))
7173

72-
all_smiles = [block['properties']['smiles'] if block else '' for block in (donor, bridge, acceptor)]
73-
74-
block_set['table'][key] = {
75-
'key': key,
76-
'chemicalFormula': chemical_formula.replace('+', '').replace('-', ''),
77-
'smiles': smiles,
78-
'lambdaMaxShift': (
79-
(donor['properties']['lambdaMaxShift'] if donor else 0)
80-
+ (bridge['properties']['lambdaMaxShift'] if bridge else 0)
81-
+ (acceptor['properties']['lambdaMaxShift'] if acceptor else 0)
74+
all_smiles = [
75+
block["properties"]["smiles"] if block else ""
76+
for block in (donor, bridge, acceptor)
77+
]
78+
79+
block_set["table"][key] = {
80+
"key": key,
81+
"chemicalFormula": chemical_formula.replace("+", "").replace("-", ""),
82+
"smiles": smiles,
83+
"lambdaMaxShift": (
84+
(donor["properties"]["lambdaMaxShift"] if donor else 0)
85+
+ (bridge["properties"]["lambdaMaxShift"] if bridge else 0)
86+
+ (acceptor["properties"]["lambdaMaxShift"] if acceptor else 0)
8287
),
83-
'molecularWeight': MolWt(naive_combine(all_smiles))
88+
"molecularWeight": MolWt(naive_combine(all_smiles)),
8489
}
8590

8691

87-
def resolve_functional_property_ranges():
88-
for prop in block_set['functionalProperties']:
89-
all_values = [entry[prop['key']] for entry in block_set['table'].values()
90-
if '0' not in entry['key'].split(':')]
91-
prop['min'] = min(all_values)
92-
prop['max'] = max(all_values)
93-
94-
9592
process_blocks()
9693
generate_lookup_table()
9794
resolve_functional_property_ranges()
9895

99-
with open(os.path.join(workdir, 'data.json'), 'w') as file:
96+
with open(os.path.join(workdir, "data.json"), "w") as file:
10097
json.dump(block_set, file, indent=2)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import json
2+
import os
3+
4+
import pandas as pd
5+
6+
from utils import get_svg_dimensions, resolve_functional_property_ranges
7+
8+
workdir = "./src/assets/blocks/chem-437"
9+
10+
input_filepath = "/Users/wenqihe2/Documents/projects/dmm/data/chem-437/blocks.xlsx"
11+
12+
block_set = {
13+
"id": "Chem_437",
14+
"moleculeSize": 3,
15+
"labelProperty": {
16+
"key": "chemicalFormula",
17+
"label": "Chemical Formula",
18+
"displayStrategy": "chemicalFormula",
19+
},
20+
"primaryProperty": {
21+
"key": "TPSA",
22+
"label": "Total Polar Surface Area",
23+
"displayStrategy": "default",
24+
},
25+
"functionalProperties": [
26+
{
27+
"key": "TPSA",
28+
"label": "Total Polar Surface Area",
29+
"displayStrategy": "default",
30+
},
31+
{
32+
"key": "cLogP",
33+
"label": "Calculated Partition Coefficient",
34+
"displayStrategy": "default",
35+
},
36+
],
37+
"firstTierProperties": [
38+
{
39+
"key": "chemicalFormula",
40+
"label": "Chemical Formula",
41+
"displayStrategy": "chemicalFormula",
42+
},
43+
{
44+
"key": "TPSA",
45+
"label": "Total Polar Surface Area",
46+
"displayStrategy": "default",
47+
},
48+
],
49+
"secondTierProperties": [
50+
{"key": "smiles", "label": "SMILES", "displayStrategy": "default"},
51+
{
52+
"key": "molecularWeight",
53+
"label": "Molecular Weight",
54+
"displayStrategy": "default",
55+
},
56+
],
57+
"blocks": [[], [], []],
58+
"table": {},
59+
}
60+
61+
62+
def add_lookup_entry(key, *, smiles, chemical_formula, molecular_weight, tpsa, c_log_p):
63+
block_set["table"][key] = {
64+
"key": key,
65+
"smiles": smiles,
66+
"chemicalFormula": chemical_formula,
67+
"molecularWeight": molecular_weight,
68+
"TPSA": tpsa,
69+
"cLogP": c_log_p,
70+
}
71+
72+
73+
def process_blocks(df):
74+
for index, row in df.iterrows():
75+
pos: str = row["Position"]
76+
block_index = 0 if pos[0] == "S" else 1 if pos[0] == "M" else 2
77+
block_id = int(pos[1:])
78+
79+
svg_path = f"assets/blocks/Chem_437/block_svg/{pos}.svg"
80+
_, _, width, height = get_svg_dimensions(svg_path)
81+
block_set["blocks"][block_index].append(
82+
{
83+
"index": block_index,
84+
"id": block_id,
85+
"svgUrl": svg_path,
86+
"width": width,
87+
"height": height,
88+
}
89+
)
90+
91+
block_ids = [0, 0, 0]
92+
block_ids[block_index] = block_id
93+
key = ":".join(str(i) for i in block_ids)
94+
95+
add_lookup_entry(
96+
key,
97+
# smiles=block['SMILES w/ connection(s)'],
98+
smiles=row["SMILES w/ H replacing RG"],
99+
chemical_formula=row["Formula"],
100+
molecular_weight=row["MW"],
101+
tpsa=row["TPSA"],
102+
c_log_p=row["cLogP"],
103+
)
104+
105+
106+
def process_products(df):
107+
for index, row in df.iterrows():
108+
s_id = int(row["Start"][1:])
109+
m_id = int(row["Middle"][1:])
110+
e_id = int(row["End"][1:])
111+
key = f"{s_id}:{m_id}:{e_id}"
112+
113+
add_lookup_entry(
114+
key,
115+
# smiles=block['SMILES w/ connection(s)'],
116+
smiles=row["SMILES"],
117+
chemical_formula=row["Formula"],
118+
molecular_weight=row["MW"],
119+
tpsa=row["TPSA"],
120+
c_log_p=row["cLogP"],
121+
)
122+
123+
124+
def main():
125+
df = pd.read_excel(input_filepath, sheet_name=None)
126+
127+
process_blocks(df["Blocks"])
128+
process_products(df["Products"])
129+
resolve_functional_property_ranges()
130+
131+
with open(os.path.join(workdir, "data.json"), "w") as f:
132+
json.dump(block_set, f, indent=2)
133+
134+
135+
if __name__ == "__main__":
136+
main()

0 commit comments

Comments
 (0)