-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcompound_ds_writer.py
More file actions
58 lines (52 loc) · 2.2 KB
/
compound_ds_writer.py
File metadata and controls
58 lines (52 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import os
from pathlib import Path
CSV_FILE_FOLDER = Path("data")
CSV_FILE_NAME = "{smiles}_similarity_data.csv"
# Define los nombres de las columnas que se van a utilizar en el DataFrame
# Asegúrate de que estos nombres coincidan con los que se usan en el DataFrame
# y en el archivo CSV que se va a crear.
columns = [
"cid",
"iupac_name",
"smiles_canonical",
"molecular_formula",
"hydrogen_bond_acceptors",
"hydrogen_bond_donors",
"rotatable_bonds",
"log_p",
"molecular_weight",
"heavy_atom_count",
"polar_surface_area",
"atom_surface_coefficient",
"no_lipinski_violations"
]
def flatten_parsed_info(compound_properties):
return {
"cid": compound_properties.get("cid", ""),
"iupac_name": compound_properties.get("iupac_name", {}),
"smiles_canonical": compound_properties.get("smiles", {}),
"molecular_formula": compound_properties.get("molecular_formula", ""),
"hydrogen_bond_acceptors": compound_properties.get("hydrogen_bond_acceptors", ""),
"hydrogen_bond_donors": compound_properties.get("hydrogen_bond_donors", ""),
"rotatable_bonds": compound_properties.get("rotatable_bonds", ""),
"log_p": compound_properties.get("log_p", ""),
"molecular_weight": compound_properties.get("molecular_weight", ""),
"heavy_atom_count": compound_properties.get("heavy_atom_count", ""),
"polar_surface_area": compound_properties.get("polar_surface_area", ""),
"atom_surface_coefficient": compound_properties.get("atom_surface_coefficient", ""),
"no_lipinski_violations": compound_properties.get("no_lipinski_violations", "")
}
def write_to_csv(smiles, compound_properties):
CSV_FILE_FOLDER.mkdir(exist_ok=True)
file_name = CSV_FILE_NAME.format(smiles=smiles)
file = CSV_FILE_FOLDER / file_name
if compound_properties:
row = flatten_parsed_info(compound_properties)
df = pd.DataFrame([row], columns=columns)
if os.path.exists(file):
df.to_csv(file, mode='a', header=False, index=False)
else:
df.to_csv(file, mode='w', header=True, index=False)
else:
print("Failed to parse the JSON data.")