Skip to content

Commit 72e7036

Browse files
Add new tests
1 parent d455890 commit 72e7036

File tree

2 files changed

+55
-5
lines changed

2 files changed

+55
-5
lines changed

chemicalx/data/datasetloader.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,65 @@
88

99

1010
class DatasetLoader:
11+
"""
12+
General dataset loader for the integrated drug pair scoring datasets.
13+
"""
14+
1115
def __init__(self, dataset_name: str):
16+
"""
17+
Args:
18+
dataset_name (str): The name of the dataset.
19+
"""
1220
self.base_url = "https://raw.githubusercontent.com/AstraZeneca/chemicalx/main/dataset"
1321
self.dataset_name = dataset_name
1422
assert dataset_name in ["drugcombdb", "drugcomb"]
1523

1624
def generate_path(self, file_name: str) -> str:
25+
"""
26+
Generating a complete url for a dataset file.
27+
28+
Args:
29+
file_name (str): Name of the data file.
30+
Returns:
31+
data_path (str): The complete url to the dataset.
32+
"""
1733
data_path = "/".join([self.base_url, self.dataset_name, file_name])
1834
return data_path
1935

2036
def load_raw_json_data(self, path: str) -> Dict:
37+
"""
38+
Given a path reading the raw JSON dataset.
39+
40+
Args:
41+
path (str): The path to the JSON file.
42+
Returns:
43+
raw_data (dict): A dictionary with the data.
44+
"""
2145
with urllib.request.urlopen(path) as url:
2246
raw_data = json.loads(url.read().decode())
2347
return raw_data
2448

2549
def load_raw_csv_data(self, path: str) -> pd.DataFrame:
50+
"""
51+
Reading the labeled triples CSV in memory.
52+
53+
Args:
54+
path (str): The path to the triples CSV file.
55+
Returns:
56+
raw_data (pd.DataFrame): A pandas DataFrame with the data.
57+
"""
2658
data_bytes = urllib.request.urlopen(path).read()
2759
types = {"drug_1": str, "drug_2": str, "context": str, "label": float}
2860
raw_data = pd.read_csv(io.BytesIO(data_bytes), encoding="utf8", sep=",", dtype=types)
2961
return raw_data
3062

3163
def get_context_features(self):
64+
"""
65+
Reading the context feature set.
66+
67+
Returns:
68+
context_feature_set (ContextFeatureSet): The ContextFeatureSet of the dataset of interest.
69+
"""
3270
path = self.generate_path("context_set.json")
3371
raw_data = self.load_raw_json_data(path)
3472
raw_data = {k: np.array(v) for k, v in raw_data.items()}
@@ -37,6 +75,12 @@ def get_context_features(self):
3775
return context_feature_set
3876

3977
def get_drug_features(self):
78+
"""
79+
Reading the drug feature set.
80+
81+
Returns:
82+
drug_feature_set (DrugFeatureSet): The DrugFeatureSet of the dataset of interest.
83+
"""
4084
path = self.generate_path("drug_set.json")
4185
raw_data = self.load_raw_json_data(path)
4286
raw_data = {k: {"smiles": v["smiles"], "features": np.array(v["features"])} for k, v in raw_data.items()}
@@ -45,8 +89,14 @@ def get_drug_features(self):
4589
return drug_feature_set
4690

4791
def get_labeled_triples(self):
92+
"""
93+
Getting the labeled triples file from the storage.
94+
95+
Returns:
96+
labeled_triples (LabeledTriples): The labeled triples in the dataset.
97+
"""
4898
path = self.generate_path("labeled_triples.csv")
4999
raw_data = self.load_raw_csv_data(path)
50-
labeled_triple_set = LabeledTriples()
51-
labeled_triple_set.update_from_pandas(raw_data)
52-
return labeled_triple_set
100+
labeled_triples = LabeledTriples()
101+
labeled_triples.update_from_pandas(raw_data)
102+
return labeled_triples

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@
3636
setup(
3737
name="chemicalx",
3838
packages=find_packages(),
39-
version="0.0.4",
39+
version="0.0.5",
4040
license="Apache License, Version 2.0",
4141
description="A Deep Learning Library for Drug Pair Scoring.",
4242
author="Benedek Rozemberczki and Charles Hoyt",
4343
author_email="",
4444
url="https://github.com/AstraZeneca/chemicalx",
45-
download_url="https://github.com/AstraZeneca/chemicalx/archive/v0.0.4.tar.gz",
45+
download_url="https://github.com/AstraZeneca/chemicalx/archive/v0.0.5.tar.gz",
4646
keywords=keywords,
4747
install_requires=install_requires,
4848
setup_requires=setup_requires,

0 commit comments

Comments
 (0)