Skip to content

Commit c4e0d76

Browse files
authored
Dom update chemicals (#140)
* fixed #136 * fixed #138
1 parent 7664fdf commit c4e0d76

File tree

9 files changed

+148
-14
lines changed

9 files changed

+148
-14
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ credentials.json
1919
.env
2020
.env.prod
2121
envv/*
22-
ptmd.db
22+
ptmd.db
23+
scripts/

ptmd/boot/file_parsers/parse_chemicals.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
from __future__ import annotations
44

5-
from pandas import read_excel, DataFrame
5+
from pandas import read_csv, DataFrame
66

77
from ptmd.const import CHEMICALS_FILEPATH, BASE_IDENTIFIER
88

@@ -13,14 +13,12 @@ def parse_chemicals() -> list[dict]:
1313
:return: A list of chemicals.
1414
"""
1515
chemicals: list[dict] = []
16-
chemicals_dataframe: DataFrame = read_excel(CHEMICALS_FILEPATH,
17-
engine='openpyxl',
18-
sheet_name="SUMMARY table of CHEMICALS")
16+
chemicals_dataframe: DataFrame = read_csv(CHEMICALS_FILEPATH, sep=",", encoding='utf-8')
1917
for compound in chemicals_dataframe.itertuples():
2018
chemicals.append({
21-
'common_name': compound.Compound.replace('"', ''),
22-
'ptx_code': int(compound._2.replace('"', '').replace(BASE_IDENTIFIER, '')),
23-
'formula': compound.Formula.replace('"', ''),
24-
'cas': compound._5.replace('"', '').split('\n')[0]
19+
'common_name': compound.compound_name_user.replace('\xa0', ''),
20+
'ptx_code': int(compound.ptx_code.replace(BASE_IDENTIFIER, '')),
21+
'formula': compound.formula,
22+
'cas': compound.cas_neutral
2523
})
2624
return chemicals

ptmd/const/directories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
SCHEMAS_PATH: str = path.join(DATA_PATH, 'schemas')
1414
EXPOSURE_INFORMATION_SCHEMA_FILEPATH: str = path.join(SCHEMAS_PATH, 'exposure_information_sheet_schema.json')
1515
PARTNERS_LONGNAME_PATH: str = path.join(DATA_PATH, 'data', 'partners.json')
16-
CHEMICALS_FILEPATH: str = path.join(DATA_PATH, 'data', 'chemicals.xlsx')
16+
CHEMICALS_FILEPATH: str = path.join(DATA_PATH, 'data', 'ptx_chem_database_chemicals.csv')
1717
ORGANISMS_FILEPATH: str = path.join(DATA_PATH, 'data', 'organisms.json')
1818
DOWNLOAD_DIRECTORY_PATH: str = path.join(DATA_PATH, 'downloads')
1919

ptmd/lib/isa/core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030

3131
class Batch2ISA:
32-
""" Class for converting a batch of PTMD data to ISA-Tab format.
32+
""" Class for converting a batch of PTMD data to ISA-json format.
3333
3434
:param file: The file to convert.
3535
"""
@@ -53,7 +53,7 @@ def convert(self) -> list[dict]:
5353
:return: A list of dictionaries containing the ISA investigations.
5454
"""
5555
study: Study = Study(
56-
filename=self.filename,
56+
filename=f's_{self.filename.replace(".xlsx", ".txt")}',
5757
sources=[self.blank_source],
5858
characteristic_categories=[ORGANISM_OA, SEX_OA, REPLICATE_OA, BOX_OA, POSITION_OA],
5959
units=[HOURS_OA]
@@ -235,7 +235,7 @@ def create_source(self, sample_identifier: str) -> Source:
235235
:param sample_identifier: The identifier of the sample.
236236
:return: A source.
237237
"""
238-
source_name: str = f"Source of sample {sample_identifier}"
238+
source_name: str = f"{sample_identifier}_source"
239239

240240
if 'Drosophila_melanogaster' not in self.organism_name:
241241
return Source(name=source_name, characteristics=[

ptmd/resources/data/chemicals.xlsx

-633 KB
Binary file not shown.

ptmd/resources/data/partners.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
"NIH": "National Institutes of Health",
88
"CLU": "Clemson University Research Foundation",
99
"LEITAT": "Acondicionatment tarrasense Associacion",
10-
"UOB": "University of Birmingham"
10+
"UOB": "University of Birmingham",
11+
"MGI": "MGI Tech Co., Ltd."
1112
}

ptmd/resources/data/ptx_chem_database_chemicals.csv

Lines changed: 133 additions & 0 deletions
Large diffs are not rendered by default.
-68.7 KB
Binary file not shown.

tests/test_boot/test_file_parsers/test_parse_chemicals.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ def test_parse_chemical(self):
1212
for key in keys:
1313
self.assertIn(key, chemical.keys())
1414
self.assertEqual(type(chemical), dict)
15+
self.assertIsInstance(chemical['ptx_code'], int)

0 commit comments

Comments
 (0)