Skip to content

Commit e804da2

Browse files
committed
style: fmt
1 parent 2ff30da commit e804da2

File tree

23 files changed

+204
-155
lines changed

23 files changed

+204
-155
lines changed

cache/__init__.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
dir_path = Path(os.path.dirname(os.path.realpath(__file__)))
1515
artifacts_dir = dir_path / "artifacts"
1616

17+
1718
def get_artifact_name(directive: list[str]) -> str:
1819
return quote_plus("/".join(directive))
1920

21+
2022
def has_expired(directive: list[str]) -> bool:
2123
"""
2224
Check if the artifact metadata associated with a directive has expired.
@@ -25,29 +27,30 @@ def has_expired(directive: list[str]) -> bool:
2527
artifact_name = get_artifact_name(directive)
2628
cache_item = get_cache_item(directive)
2729

28-
metadata_dir = artifacts_dir / 'metadata'
30+
metadata_dir = artifacts_dir / "metadata"
2931
metadata_dir.mkdir(exist_ok=True)
30-
metadata_file = (artifacts_dir / 'metadata' / artifact_name).with_suffix((artifacts_dir / artifact_name).suffix + '.metadata')
32+
metadata_file = (artifacts_dir / "metadata" / artifact_name).with_suffix((artifacts_dir / artifact_name).suffix + ".metadata")
3133

3234
# metadata never existed: we need to retrieve the new file
3335
if not metadata_file.exists():
34-
with open(metadata_file, 'wb') as f:
36+
with open(metadata_file, "wb") as f:
3537
pickle.dump(cache_item, f)
3638
return True
3739

3840
old_cache_item = None
39-
with open(metadata_file, 'rb') as f:
41+
with open(metadata_file, "rb") as f:
4042
old_cache_item = pickle.load(f)
4143

4244
# metadata expired: re-retrieve the item
4345
if old_cache_item != cache_item:
44-
with open(metadata_file, 'wb') as f:
46+
with open(metadata_file, "wb") as f:
4547
pickle.dump(cache_item, f)
4648
return True
4749

4850
# metadata hasn't changed and already existed: this hasn't expired
4951
return False
5052

53+
5154
def link(output: str, directive: list[str], uncompress=False):
5255
"""
5356
Links output files from cache.directory directives.
@@ -74,7 +77,7 @@ def link(output: str, directive: list[str], uncompress=False):
7477
cache_item.download(artifacts_dir / artifact_name)
7578

7679
if uncompress:
77-
uncompressed_artifact_path = Path(str(artifacts_dir / artifact_name) + '.uncompressed')
80+
uncompressed_artifact_path = Path(str(artifacts_dir / artifact_name) + ".uncompressed")
7881
uncompressed_artifact_path.unlink(missing_ok=True)
7982
uncompress_file(artifacts_dir / artifact_name, uncompressed_artifact_path)
8083
Path(output).symlink_to(uncompressed_artifact_path)

cache/cli.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,21 @@
1010
import argparse
1111
from cache.directory import get_cache_item
1212

13+
1314
def parse_args():
14-
parser = argparse.ArgumentParser(
15-
prog='Cache',
16-
description='CLI utility for directory.py')
17-
parser.add_argument('path')
18-
parser.add_argument('output')
15+
parser = argparse.ArgumentParser(prog="Cache", description="CLI utility for directory.py")
16+
parser.add_argument("path")
17+
parser.add_argument("output")
1918

2019
return parser.parse_args()
2120

21+
2222
def main():
2323
args = parse_args()
2424
cache_item = get_cache_item(args.path.split("/"))
2525

2626
cache_item.download(args.output)
2727

28+
2829
if __name__ == "__main__":
2930
main()

cache/directory.py

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
dir_path = Path(__file__).parent.resolve()
1717

1818
# Our cache emits warnings for files with unpinned versions that don't match the cache.
19-
(dir_path / 'logs').mkdir(exist_ok=True)
20-
logger.add(dir_path / 'logs' / "cache.log", level="WARNING")
19+
(dir_path / "logs").mkdir(exist_ok=True)
20+
logger.add(dir_path / "logs" / "cache.log", level="WARNING")
21+
2122

2223
class DownloadFileCheckException(RuntimeError):
2324
"""See Service#download_against_cache for some motivation for this custom error"""
2425

26+
2527
@dataclass
2628
class Service:
2729
url: str
@@ -34,17 +36,12 @@ def download(self, output: str | PathLike) -> requests.Response:
3436
# As per https://stackoverflow.com/a/39217788/7589775 to enable download streaming.
3537
with requests.get(self.url, stream=True, headers=self.headers) as response:
3638
response.raw.decode_content = True
37-
with open(output, 'wb') as f:
39+
with open(output, "wb") as f:
3840
shutil.copyfileobj(response.raw, f)
3941
return response
4042

4143
# NOTE: this is slightly yucky code deduplication. The only intended values of `downloaded_file_type` are `pinned` and `unpinned`.
42-
def download_against_cache(
43-
self,
44-
cache: Path,
45-
downloaded_file_type: str,
46-
move_output: bool
47-
):
44+
def download_against_cache(self, cache: Path, downloaded_file_type: str, move_output: bool):
4845
"""
4946
Downloads `this` Service and checks it against the provided `cache` at path. In logs,
5047
the file will be referred to as `downloaded_file_type`.
@@ -68,21 +65,24 @@ def download_against_cache(
6865
else:
6966
shutil.copy(cache, debug_file_path)
7067
# We use a custom error type to prevent any overlap with RuntimeError. I am not sure if there is any.
71-
raise DownloadFileCheckException(f"The {downloaded_file_type} file {downloaded_file_path} and " + \
72-
f"cached file originally at {cache} do not match! " + \
73-
f"Compare the pinned {downloaded_file_path} and the cached {debug_file_path}.")
68+
raise DownloadFileCheckException(
69+
f"The {downloaded_file_type} file {downloaded_file_path} and "
70+
+ f"cached file originally at {cache} do not match! "
71+
+ f"Compare the pinned {downloaded_file_path} and the cached {debug_file_path}."
72+
)
7473
else:
7574
# Since we don't clean up pinned_file_path for the above branch's debugging,
7675
# we need to clean it up here.
7776
downloaded_file_path.unlink()
7877

7978
@staticmethod
80-
def coerce(obj: 'Service | str') -> 'Service':
79+
def coerce(obj: "Service | str") -> "Service":
8180
# TODO: This could also be replaced by coercing str to Service in CacheItem via pydantic.
8281
if isinstance(obj, str):
8382
return Service(url=obj)
8483
return obj
8584

85+
8686
def fetch_biomart_service(xml: str) -> Service:
8787
"""
8888
Access BioMart data through the BioMart REST API:
@@ -91,6 +91,7 @@ def fetch_biomart_service(xml: str) -> Service:
9191
ROOT = "http://www.ensembl.org/biomart/martservice?query="
9292
return Service(ROOT + urllib.parse.quote_plus(xml))
9393

94+
9495
@dataclass
9596
class CacheItem:
9697
"""
@@ -136,7 +137,7 @@ def download(self, output: str | PathLike):
136137
logger.info(f"Fetching {self.name}...")
137138

138139
logger.info(f"Downloading cache {self.cached} to {output}...")
139-
gdown.download(self.cached, str(output)) # gdown doesn't have a type signature, but it expects a string :/
140+
gdown.download(self.cached, str(output)) # gdown doesn't have a type signature, but it expects a string :/
140141

141142
if self.pinned is not None:
142143
Service.coerce(self.pinned).download_against_cache(cache=Path(output), downloaded_file_type="pinned", move_output=True)
@@ -148,6 +149,8 @@ def download(self, output: str | PathLike):
148149
logger.warning(err)
149150

150151
# TODO: yikes! same with self.unpinned
152+
153+
151154
CacheDirectory = dict[str, Union[CacheItem, "CacheDirectory"]]
152155

153156
# An *unversioned* directory list.
@@ -255,8 +258,8 @@ def download(self, output: str | PathLike):
255258
name="KEGG 03250",
256259
cached="https://drive.google.com/uc?id=16dtWKHCQMp2qrLfFDE7nVhbwBCr2H5a9",
257260
unpinned=Service(
258-
"https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml",
259-
headers={'Referer': 'https://www.kegg.jp/pathway/ko03250'})
261+
"https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml", headers={"Referer": "https://www.kegg.jp/pathway/ko03250"}
262+
),
260263
)
261264
},
262265
"HIV1": {
@@ -266,13 +269,13 @@ def download(self, output: str | PathLike):
266269
"prize_05.tsv": CacheItem(
267270
name="HIV_05 prizes",
268271
cached="https://drive.google.com/uc?id=1jVWNRPfYkbqimO44GdzXYB3-7NXhet1m",
269-
pinned="https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_05.csv"
272+
pinned="https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_05.csv",
270273
),
271274
"prize_060.tsv": CacheItem(
272275
name="HIV_060 prizes",
273276
cached="https://drive.google.com/uc?id=1Aucgp7pcooGr9oT4m2bvYEuYW6186WxQ",
274-
pinned="https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_060.csv"
275-
)
277+
pinned="https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_060.csv",
278+
),
276279
},
277280
"iRefIndex": {
278281
# This can also be obtained from the SPRAS repo, though the SPRAS repo removes self loops. We don't.
@@ -283,42 +286,42 @@ def download(self, output: str | PathLike):
283286
"phosphosite-irefindex13.0-uniprot.txt": CacheItem(
284287
name="iRefIndex v13.0 UniProt interactome",
285288
cached="https://drive.google.com/uc?id=1fQ8Z3FjEwUseEtsExO723zj7mAAtdomo",
286-
pinned="https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/networks/phosphosite-irefindex13.0-uniprot.txt"
289+
pinned="https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/networks/phosphosite-irefindex13.0-uniprot.txt",
287290
)
288291
},
289292
"OsmoticStress": {
290293
"yeast_pcsf_network.sif": CacheItem(
291294
# In the paper https://doi.org/10.1016/j.celrep.2018.08.085
292295
name="Case Study Edge Results, from Supplementary Data 3",
293-
cached="https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h"
296+
cached="https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h",
294297
),
295298
# The following files are from https://github.com/gitter-lab/osmotic-stress.
296299
# While the following files do point to the repository's main branch,
297300
# they aren't expected to actually change.
298301
"prizes.txt": CacheItem(
299302
name="Osmotic Stress Prizes",
300303
pinned="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/prizes.txt",
301-
cached="https://drive.google.com/uc?id=16WDQs0Vjv6rI12-hbifsbnpH31jMGhJg"
304+
cached="https://drive.google.com/uc?id=16WDQs0Vjv6rI12-hbifsbnpH31jMGhJg",
302305
),
303306
"ChasmanNetwork-DirUndir.txt": CacheItem(
304307
name="Network Input",
305308
pinned="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/ChasmanNetwork-DirUndir.txt",
306-
cached="https://drive.google.com/uc?id=1qYXPaWcPU72YYME7NaBzD7thYCHRzrLH"
309+
cached="https://drive.google.com/uc?id=1qYXPaWcPU72YYME7NaBzD7thYCHRzrLH",
307310
),
308311
"dummy.txt": CacheItem(
309312
name="Dummy Nodes File",
310313
pinned="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/dummy.txt",
311-
cached="https://drive.google.com/uc?id=1dsFIhBrIEahggg0JPxw64JwS51pKxoQU"
314+
cached="https://drive.google.com/uc?id=1dsFIhBrIEahggg0JPxw64JwS51pKxoQU",
312315
),
313316
"_edgeFreq.eda ": CacheItem(
314317
name="Case Study Omics Integrator Edge Frequencies",
315318
pinned="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Notebooks/Forest-TPS/_edgeFreq.eda",
316-
cached="https://drive.google.com/uc?id=1M_rxEzUCo_EVuFyM47OEH2J-4LB3eeCR"
319+
cached="https://drive.google.com/uc?id=1M_rxEzUCo_EVuFyM47OEH2J-4LB3eeCR",
317320
),
318321
"goldStandardUnionDetailed.txt": CacheItem(
319322
name="Gold Standard Reference Pathways",
320323
pinned="https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/data/evaluation/goldStandardUnionDetailed.txt",
321-
cached="https://drive.google.com/uc?id=1-_zF9oKFCNmJbDCC2vq8OM17HJw80s2T"
324+
cached="https://drive.google.com/uc?id=1-_zF9oKFCNmJbDCC2vq8OM17HJw80s2T",
322325
),
323326
},
324327
"EGFR": {
@@ -328,19 +331,19 @@ def download(self, output: str | PathLike):
328331
"eight-egfr-reference-all.txt": CacheItem(
329332
name="EGFR Gold Standard Reference",
330333
pinned="https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/resources/eight-egfr-reference-all.txt",
331-
cached="https://drive.google.com/uc?id=15MqpIbH1GRA1tq0ZXH9oMnKytoFSzXyw"
334+
cached="https://drive.google.com/uc?id=15MqpIbH1GRA1tq0ZXH9oMnKytoFSzXyw",
332335
),
333336
"egfr-prizes.txt": CacheItem(
334337
name="EGFR prizes",
335338
pinned="https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/pcsf/egfr-prizes.txt",
336-
cached="https://drive.google.com/uc?id=1nI5hw-rYRZPs15UJiqokHpHEAabRq6Xj"
337-
)
339+
cached="https://drive.google.com/uc?id=1nI5hw-rYRZPs15UJiqokHpHEAabRq6Xj",
340+
),
338341
},
339342
"Surfaceome": {
340343
"table_S3_surfaceome.xlsx": CacheItem(
341344
name="Human surfaceome",
342345
unpinned="http://wlab.ethz.ch/surfaceome/table_S3_surfaceome.xlsx",
343-
cached="https://docs.google.com/uc?id=1cBXYbDnAJVet0lv3BRrizV5FuqfMbBr0"
346+
cached="https://docs.google.com/uc?id=1cBXYbDnAJVet0lv3BRrizV5FuqfMbBr0",
344347
)
345348
},
346349
"TranscriptionFactors": {
@@ -357,7 +360,7 @@ def download(self, output: str | PathLike):
357360
"pc-biopax.owl.gz": CacheItem(
358361
name="PathwayCommons Universal BioPAX file",
359362
cached="https://drive.google.com/uc?id=1R7uE2ky7fGlZThIWCOblu7iqbpC-aRr0",
360-
pinned="https://download.baderlab.org/PathwayCommons/PC2/v14/pc-biopax.owl.gz"
363+
pinned="https://download.baderlab.org/PathwayCommons/PC2/v14/pc-biopax.owl.gz",
361364
),
362365
"pathways.txt.gz": CacheItem(
363366
name="PathwayCommons Pathway Identifiers",
@@ -367,15 +370,14 @@ def download(self, output: str | PathLike):
367370
"denylist.txt": CacheItem(
368371
name="PathwayCommons small molecule denylist",
369372
cached="https://drive.google.com/uc?id=1QmISJXPvVljA8oKuNYRUNbJJvZKPa_-u",
370-
pinned="https://download.baderlab.org/PathwayCommons/PC2/v14/blacklist.txt"
373+
pinned="https://download.baderlab.org/PathwayCommons/PC2/v14/blacklist.txt",
371374
),
372375
"intermediate": {
373376
"pc-panther-biopax.owl": CacheItem(
374-
name="PathwayCommons PANTHER-only BioPAX file",
375-
cached="https://drive.google.com/uc?id=1MklrD8CJ1BIjh_wWr_g5rrIJ5XJB7FUI"
377+
name="PathwayCommons PANTHER-only BioPAX file", cached="https://drive.google.com/uc?id=1MklrD8CJ1BIjh_wWr_g5rrIJ5XJB7FUI"
376378
)
377-
}
378-
}
379+
},
380+
},
379381
}
380382

381383

@@ -394,7 +396,9 @@ def get_cache_item(path: list[str]) -> CacheItem:
394396

395397
# Google Drive validation. TODO: remove if move to OSDF.
396398
if "uc?id=" not in current_item.cached or "/view?usp=sharing" in current_item.cached:
397-
raise RuntimeError("Make sure your Google Drive URLs are in https://drive.google.com/uc?id=... format " + \
398-
"with no /view?usp=sharing at the end. See CONTRIBUTING.md for more info.")
399+
raise RuntimeError(
400+
"Make sure your Google Drive URLs are in https://drive.google.com/uc?id=... format "
401+
+ "with no /view?usp=sharing at the end. See CONTRIBUTING.md for more info."
402+
)
399403

400404
return current_item

datasets/contributing/raw_generation.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66
import uuid
77
import pandas
88

9+
910
def random_id() -> str:
1011
return uuid.uuid4().hex
1112

13+
1214
def assign_ids(graph: networkx.DiGraph) -> networkx.DiGraph:
1315
"""Assigns new IDs to a graph based on `random_id`"""
1416
mapping = {node: random_id() for node in graph}
1517
return networkx.relabel_nodes(graph, mapping)
1618

19+
1720
def gnp_noise(graph: networkx.DiGraph, p: float):
1821
"""
1922
The mutative equivalent to networkx.gnp_random_graph,
@@ -23,8 +26,9 @@ def gnp_noise(graph: networkx.DiGraph, p: float):
2326
if random.random() < p:
2427
graph.add_edge(*e)
2528

29+
2630
def generate_parser():
27-
parser = argparse.ArgumentParser(prog='Pathway generator')
31+
parser = argparse.ArgumentParser(prog="Pathway generator")
2832
parser.add_argument("--path-count", type=int, default=10)
2933
parser.add_argument("--path-length", type=int, default=7)
3034

@@ -39,6 +43,7 @@ def generate_parser():
3943
parser.add_argument("--interactome-output", type=str, default="interactome.tsv")
4044
return parser
4145

46+
4247
def main():
4348
args = generate_parser().parse_args()
4449

@@ -66,13 +71,14 @@ def main():
6671
gold_standard = pandas.DataFrame(((a, b) for a, b, _data in networkx.to_edgelist(graph)), columns=["Source", "Target"])
6772
# We make the gold standard output a little annoying to force some post-processing with pandas.
6873
gold_standard.insert(1, "Interaction-Type", "pp")
69-
gold_standard.to_csv(args.gold_standard_output, index=False, sep='\t')
74+
gold_standard.to_csv(args.gold_standard_output, index=False, sep="\t")
7075

7176
# and we'll follow along similarly to above to build our interactome.
7277
graph.add_nodes_from((random_id() for _ in range(args.interactome_extra_nodes)))
7378
gnp_noise(graph, args.interactome_noise)
7479
interactome = pandas.DataFrame(((a, b) for a, b, _data in networkx.to_edgelist(graph)), columns=["Source", "Target"])
75-
interactome.to_csv(args.interactome_output, index=False, sep='\t')
80+
interactome.to_csv(args.interactome_output, index=False, sep="\t")
81+
7682

7783
if __name__ == "__main__":
7884
main()

datasets/diseases/scripts/interactome.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
diseases_path = Path(__file__).parent.parent.resolve()
55

6+
67
def main():
78
# See /cache/directory.py for information on how this was grabbed.
89
# 9606 is the organism code for homo sapiens and the required background interactome of DISEASES.
@@ -15,5 +16,6 @@ def main():
1516
(diseases_path / "processed").mkdir(exist_ok=True)
1617
string.to_csv(diseases_path / "processed" / "string_interactome.tsv", sep="\t", index=False, header=False)
1718

19+
1820
if __name__ == "__main__":
1921
main()

0 commit comments

Comments
 (0)