moleculemaker
diff --git a/‎scripts/generate_assets.py‎
Lines changed: 53 additions & 0 deletions b/‎scripts/generate_assets.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎scripts/process_block_svg.py‎
Lines changed: 2 additions & 1 deletion b/‎scripts/process_block_svg.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎scripts/process_color_wheel.py‎
Lines changed: 44 additions & 47 deletions b/‎scripts/process_color_wheel.py‎
Lines changed: 44 additions & 47 deletions
diff --git a/‎scripts/process_kinase_inhibitors.py‎
Lines changed: 136 additions & 0 deletions b/‎scripts/process_kinase_inhibitors.py‎
Lines changed: 136 additions & 0 deletions
@@ -0,0 +1,53 @@
+import glob
+import json
+import os
+
+from openbabel import openbabel
+from tqdm import tqdm
+
+# workdir = "./src/assets/blocks/chem-437"
+workdir = "./src/assets/blocks/opv"
+
+
+def generate_assets(key, smiles):
+    ob_conv = openbabel.OBConversion()
+    ob_conv.SetInFormat("smi")
+
+    mol = openbabel.OBMol()
+    ob_conv.ReadString(mol, smiles)
+    mol.AddHydrogens()
+    openbabel.OBBuilder().Build(mol)
+
+    ob_conv.SetOutFormat("mol2")
+    mol2 = ob_conv.WriteString(mol)
+
+    ob_conv.SetOutFormat("svg")
+    svg = ob_conv.WriteString(mol)
+
+    with open(os.path.join(workdir, "smi", f"{key}.smi"), "w") as f:
+        f.write(smiles)
+
+    with open(os.path.join(workdir, "svg", f"{key}.svg"), "w") as f:
+        f.write(svg)
+
+    with open(os.path.join(workdir, "mol2", f"{key}.mol2"), "w") as f:
+        f.write(mol2)
+
+
+def main():
+    with open(os.path.join(workdir, "data.json")) as f:
+        block_set = json.load(f)
+    for subdir in ["smi", "svg", "mol2"]:
+        os.makedirs(os.path.join(workdir, subdir), exist_ok=True)
+        for filepath in glob.glob(os.path.join(workdir, subdir, "*")):
+            os.remove(filepath)
+    for entry in tqdm(block_set["table"].values()):
+        if "0" in entry["key"].split(":"):
+            continue
+        if "smiles" in entry:
+            key = entry["key"].replace(":", "_")
+            generate_assets(key, entry["smiles"])
+
+
+if __name__ == "__main__":
+    main()
@@ -5,6 +5,7 @@
 
 import numpy as np
 
+# workdir = "./src/assets/blocks/opv"
 workdir = "./src/assets/blocks/chem-437"
 
 
@@ -48,7 +49,7 @@ def get_arc_bbox(
             (rx**2 * ry**2 - rx**2 * y1_**2 - ry**2 * x1_**2)
             / (
                 rx**2 * y1_**2 + ry**2 * x1_**2
-            ), # this could become negative, probably due to rounding error
+            ),  # this could become negative, probably due to rounding error
         )
     )
     if large_arc_flag == sweep_flag:
 
@@ -5,33 +5,35 @@
 from rdkit.Chem import CanonSmiles, MolFromSmiles
 from rdkit.Chem.rdMolDescriptors import CalcMolFormula
 
-from utils import get_svg_dimensions, naive_combine
+from utils import get_svg_dimensions, naive_combine, resolve_functional_property_ranges
 
-workdir = './src/assets/blocks/10x10x10palette'
+workdir = "./src/assets/blocks/10x10x10palette"
 
-with open(os.path.join(workdir, 'block_set.json')) as file:
+with open(os.path.join(workdir, "block_set.json")) as file:
     block_set = json.load(file)
 
 blocks_by_index = [[None], [None], [None]]
 
 
 def process_blocks():
     processed_blocks = [[], [], []]
-    for block in block_set['blocks']:
-        blocks_by_index[block['index']].append(block)
-
-        _, _, width, height = get_svg_dimensions(block['svgUrl'])
-        processed_blocks[int(block['index'])].append({
-            'index': block['index'],
-            'id': block['id'],
-            'svgUrl': block['svgUrl'],
-            'width': width,
-            'height': height
-        })
+    for block in block_set["blocks"]:
+        blocks_by_index[block["index"]].append(block)
+
+        _, _, width, height = get_svg_dimensions(block["svgUrl"])
+        processed_blocks[int(block["index"])].append(
+            {
+                "index": block["index"],
+                "id": block["id"],
+                "svgUrl": block["svgUrl"],
+                "width": width,
+                "height": height,
+            }
+        )
 
     for blocks in processed_blocks:
-        blocks.sort(key=lambda b: b['id'])
-    block_set['blocks'] = processed_blocks
+        blocks.sort(key=lambda b: b["id"])
+    block_set["blocks"] = processed_blocks
 
 
 def get_smiles(donor, bridge, acceptor):
@@ -42,59 +44,54 @@ def get_smiles(donor, bridge, acceptor):
     blocks = [block for block in blocks if block]
 
     if len(blocks) == 1:
-        return CanonSmiles(blocks[0]['properties']['smiles'])
+        return CanonSmiles(blocks[0]["properties"]["smiles"])
 
     if not donor or not bridge or not acceptor:
-        return ''
+        return ""
 
-    start = chr(ord('A') + (donor['id'] - 1))
-    mid = bridge['id']
-    end = chr(ord('K') + (acceptor['id'] - 1))
+    start = chr(ord("A") + (donor["id"] - 1))
+    mid = bridge["id"]
+    end = chr(ord("K") + (acceptor["id"] - 1))
 
-    filename = workdir + f'/smi/{start}_{mid}_{end}.smi'
+    filename = workdir + f"/smi/{start}_{mid}_{end}.smi"
     with open(filename) as f:
         smiles = f.read().strip()
     return smiles
 
 
 def generate_lookup_table():
-    block_set['table'] = {}
+    block_set["table"] = {}
 
     for donor, bridge, acceptor in itertools.product(*blocks_by_index):
-        d_id = donor['id'] if donor else 0
-        b_id = bridge['id'] if bridge else 0
-        a_id = acceptor['id'] if acceptor else 0
-        key = f'{d_id}:{b_id}:{a_id}'
+        d_id = donor["id"] if donor else 0
+        b_id = bridge["id"] if bridge else 0
+        a_id = acceptor["id"] if acceptor else 0
+        key = f"{d_id}:{b_id}:{a_id}"
 
         smiles = get_smiles(donor, bridge, acceptor)
         chemical_formula = CalcMolFormula(MolFromSmiles(smiles))
 
-        all_smiles = [block['properties']['smiles'] if block else '' for block in (donor, bridge, acceptor)]
-
-        block_set['table'][key] = {
-            'key': key,
-            'chemicalFormula': chemical_formula.replace('+', '').replace('-', ''),
-            'smiles': smiles,
-            'lambdaMaxShift': (
-                (donor['properties']['lambdaMaxShift'] if donor else 0)
-                + (bridge['properties']['lambdaMaxShift'] if bridge else 0)
-                + (acceptor['properties']['lambdaMaxShift'] if acceptor else 0)
+        all_smiles = [
+            block["properties"]["smiles"] if block else ""
+            for block in (donor, bridge, acceptor)
+        ]
+
+        block_set["table"][key] = {
+            "key": key,
+            "chemicalFormula": chemical_formula.replace("+", "").replace("-", ""),
+            "smiles": smiles,
+            "lambdaMaxShift": (
+                (donor["properties"]["lambdaMaxShift"] if donor else 0)
+                + (bridge["properties"]["lambdaMaxShift"] if bridge else 0)
+                + (acceptor["properties"]["lambdaMaxShift"] if acceptor else 0)
             ),
-            'molecularWeight': MolWt(naive_combine(all_smiles))
+            "molecularWeight": MolWt(naive_combine(all_smiles)),
         }
 
 
-def resolve_functional_property_ranges():
-    for prop in block_set['functionalProperties']:
-        all_values = [entry[prop['key']] for entry in block_set['table'].values()
-                      if '0' not in entry['key'].split(':')]
-        prop['min'] = min(all_values)
-        prop['max'] = max(all_values)
-
-
 process_blocks()
 generate_lookup_table()
 resolve_functional_property_ranges()
 
-with open(os.path.join(workdir, 'data.json'), 'w') as file:
+with open(os.path.join(workdir, "data.json"), "w") as file:
     json.dump(block_set, file, indent=2)
@@ -0,0 +1,136 @@
+import json
+import os
+
+import pandas as pd
+
+from utils import get_svg_dimensions, resolve_functional_property_ranges
+
+workdir = "./src/assets/blocks/chem-437"
+
+input_filepath = "/Users/wenqihe2/Documents/projects/dmm/data/chem-437/blocks.xlsx"
+
+block_set = {
+    "id": "Chem_437",
+    "moleculeSize": 3,
+    "labelProperty": {
+        "key": "chemicalFormula",
+        "label": "Chemical Formula",
+        "displayStrategy": "chemicalFormula",
+    },
+    "primaryProperty": {
+        "key": "TPSA",
+        "label": "Total Polar Surface Area",
+        "displayStrategy": "default",
+    },
+    "functionalProperties": [
+        {
+            "key": "TPSA",
+            "label": "Total Polar Surface Area",
+            "displayStrategy": "default",
+        },
+        {
+            "key": "cLogP",
+            "label": "Calculated Partition Coefficient",
+            "displayStrategy": "default",
+        },
+    ],
+    "firstTierProperties": [
+        {
+            "key": "chemicalFormula",
+            "label": "Chemical Formula",
+            "displayStrategy": "chemicalFormula",
+        },
+        {
+            "key": "TPSA",
+            "label": "Total Polar Surface Area",
+            "displayStrategy": "default",
+        },
+    ],
+    "secondTierProperties": [
+        {"key": "smiles", "label": "SMILES", "displayStrategy": "default"},
+        {
+            "key": "molecularWeight",
+            "label": "Molecular Weight",
+            "displayStrategy": "default",
+        },
+    ],
+    "blocks": [[], [], []],
+    "table": {},
+}
+
+
+def add_lookup_entry(key, *, smiles, chemical_formula, molecular_weight, tpsa, c_log_p):
+    block_set["table"][key] = {
+        "key": key,
+        "smiles": smiles,
+        "chemicalFormula": chemical_formula,
+        "molecularWeight": molecular_weight,
+        "TPSA": tpsa,
+        "cLogP": c_log_p,
+    }
+
+
+def process_blocks(df):
+    for index, row in df.iterrows():
+        pos: str = row["Position"]
+        block_index = 0 if pos[0] == "S" else 1 if pos[0] == "M" else 2
+        block_id = int(pos[1:])
+
+        svg_path = f"assets/blocks/Chem_437/block_svg/{pos}.svg"
+        _, _, width, height = get_svg_dimensions(svg_path)
+        block_set["blocks"][block_index].append(
+            {
+                "index": block_index,
+                "id": block_id,
+                "svgUrl": svg_path,
+                "width": width,
+                "height": height,
+            }
+        )
+
+        block_ids = [0, 0, 0]
+        block_ids[block_index] = block_id
+        key = ":".join(str(i) for i in block_ids)
+
+        add_lookup_entry(
+            key,
+            # smiles=block['SMILES w/ connection(s)'],
+            smiles=row["SMILES w/ H replacing RG"],
+            chemical_formula=row["Formula"],
+            molecular_weight=row["MW"],
+            tpsa=row["TPSA"],
+            c_log_p=row["cLogP"],
+        )
+
+
+def process_products(df):
+    for index, row in df.iterrows():
+        s_id = int(row["Start"][1:])
+        m_id = int(row["Middle"][1:])
+        e_id = int(row["End"][1:])
+        key = f"{s_id}:{m_id}:{e_id}"
+
+        add_lookup_entry(
+            key,
+            # smiles=block['SMILES w/ connection(s)'],
+            smiles=row["SMILES"],
+            chemical_formula=row["Formula"],
+            molecular_weight=row["MW"],
+            tpsa=row["TPSA"],
+            c_log_p=row["cLogP"],
+        )
+
+
+def main():
+    df = pd.read_excel(input_filepath, sheet_name=None)
+
+    process_blocks(df["Blocks"])
+    process_products(df["Products"])
+    resolve_functional_property_ranges()
+
+    with open(os.path.join(workdir, "data.json"), "w") as f:
+        json.dump(block_set, f, indent=2)
+
+
+if __name__ == "__main__":
+    main()
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`	`import numpy as np`
`7`	`7`
	`8`	`+# workdir = "./src/assets/blocks/opv"`
`8`	`9`	`workdir = "./src/assets/blocks/chem-437"`
`9`	`10`
`10`	`11`
`@@ -48,7 +49,7 @@ def get_arc_bbox(`
`48`	`49`	`(rx*2 ry2 - rx2 * y1_2 - ry2 * x1_**2)`
`49`	`50`	`/ (`
`50`	`51`	`rx*2 y1_2 + ry2 * x1_**2`
`51`		`- ), # this could become negative, probably due to rounding error`
	`52`	`+ ), # this could become negative, probably due to rounding error`
`52`	`53`	`)`
`53`	`54`	`)`
`54`	`55`	`if large_arc_flag == sweep_flag:`