1616dir_path = Path (__file__ ).parent .resolve ()
1717
1818# Our cache emits warnings for files with unpinned versions that don't match the cache.
19- (dir_path / 'logs' ).mkdir (exist_ok = True )
20- logger .add (dir_path / 'logs' / "cache.log" , level = "WARNING" )
19+ (dir_path / "logs" ).mkdir (exist_ok = True )
20+ logger .add (dir_path / "logs" / "cache.log" , level = "WARNING" )
21+
2122
2223class DownloadFileCheckException (RuntimeError ):
2324 """See Service#download_against_cache for some motivation for this custom error"""
2425
26+
2527@dataclass
2628class Service :
2729 url : str
@@ -34,17 +36,12 @@ def download(self, output: str | PathLike) -> requests.Response:
3436 # As per https://stackoverflow.com/a/39217788/7589775 to enable download streaming.
3537 with requests .get (self .url , stream = True , headers = self .headers ) as response :
3638 response .raw .decode_content = True
37- with open (output , 'wb' ) as f :
39+ with open (output , "wb" ) as f :
3840 shutil .copyfileobj (response .raw , f )
3941 return response
4042
4143 # NOTE: this is slightly yucky code deduplication. The only intended values of `downloaded_file_type` are `pinned` and `unpinned`.
42- def download_against_cache (
43- self ,
44- cache : Path ,
45- downloaded_file_type : str ,
46- move_output : bool
47- ):
44+ def download_against_cache (self , cache : Path , downloaded_file_type : str , move_output : bool ):
4845 """
4946 Downloads `this` Service and checks it against the provided `cache` at path. In logs,
5047 the file will be referred to as `downloaded_file_type`.
@@ -68,21 +65,24 @@ def download_against_cache(
6865 else :
6966 shutil .copy (cache , debug_file_path )
7067 # We use a custom error type to prevent any overlap with RuntimeError. I am not sure if there is any.
71- raise DownloadFileCheckException (f"The { downloaded_file_type } file { downloaded_file_path } and " + \
72- f"cached file originally at { cache } do not match! " + \
73- f"Compare the pinned { downloaded_file_path } and the cached { debug_file_path } ." )
68+ raise DownloadFileCheckException (
69+ f"The { downloaded_file_type } file { downloaded_file_path } and "
70+ + f"cached file originally at { cache } do not match! "
71+ + f"Compare the pinned { downloaded_file_path } and the cached { debug_file_path } ."
72+ )
7473 else :
7574 # Since we don't clean up pinned_file_path for the above branch's debugging,
7675 # we need to clean it up here.
7776 downloaded_file_path .unlink ()
7877
7978 @staticmethod
80- def coerce (obj : ' Service | str' ) -> ' Service' :
79+ def coerce (obj : " Service | str" ) -> " Service" :
8180 # TODO: This could also be replaced by coercing str to Service in CacheItem via pydantic.
8281 if isinstance (obj , str ):
8382 return Service (url = obj )
8483 return obj
8584
85+
8686def fetch_biomart_service (xml : str ) -> Service :
8787 """
8888 Access BioMart data through the BioMart REST API:
@@ -91,6 +91,7 @@ def fetch_biomart_service(xml: str) -> Service:
9191 ROOT = "http://www.ensembl.org/biomart/martservice?query="
9292 return Service (ROOT + urllib .parse .quote_plus (xml ))
9393
94+
9495@dataclass
9596class CacheItem :
9697 """
@@ -136,7 +137,7 @@ def download(self, output: str | PathLike):
136137 logger .info (f"Fetching { self .name } ..." )
137138
138139 logger .info (f"Downloading cache { self .cached } to { output } ..." )
139- gdown .download (self .cached , str (output )) # gdown doesn't have a type signature, but it expects a string :/
140+ gdown .download (self .cached , str (output )) # gdown doesn't have a type signature, but it expects a string :/
140141
141142 if self .pinned is not None :
142143 Service .coerce (self .pinned ).download_against_cache (cache = Path (output ), downloaded_file_type = "pinned" , move_output = True )
@@ -148,6 +149,8 @@ def download(self, output: str | PathLike):
148149 logger .warning (err )
149150
150151 # TODO: yikes! same with self.unpinned
152+
153+
151154CacheDirectory = dict [str , Union [CacheItem , "CacheDirectory" ]]
152155
153156# An *unversioned* directory list.
@@ -255,8 +258,8 @@ def download(self, output: str | PathLike):
255258 name = "KEGG 03250" ,
256259 cached = "https://drive.google.com/uc?id=16dtWKHCQMp2qrLfFDE7nVhbwBCr2H5a9" ,
257260 unpinned = Service (
258- "https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml" ,
259- headers = { 'Referer' : 'https://www.kegg.jp/pathway/ko03250' })
261+ "https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml" , headers = { "Referer" : "https://www.kegg.jp/pathway/ko03250" }
262+ ),
260263 )
261264 },
262265 "HIV1" : {
@@ -266,13 +269,13 @@ def download(self, output: str | PathLike):
266269 "prize_05.tsv" : CacheItem (
267270 name = "HIV_05 prizes" ,
268271 cached = "https://drive.google.com/uc?id=1jVWNRPfYkbqimO44GdzXYB3-7NXhet1m" ,
269- pinned = "https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_05.csv"
272+ pinned = "https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_05.csv" ,
270273 ),
271274 "prize_060.tsv" : CacheItem (
272275 name = "HIV_060 prizes" ,
273276 cached = "https://drive.google.com/uc?id=1Aucgp7pcooGr9oT4m2bvYEuYW6186WxQ" ,
274- pinned = "https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_060.csv"
275- )
277+ pinned = "https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/refs/heads/main/Results/base_analysis/prize_060.csv" ,
278+ ),
276279 },
277280 "iRefIndex" : {
278281 # This can also be obtained from the SPRAS repo, though the SPRAS repo removes self loops. We don't.
@@ -283,42 +286,42 @@ def download(self, output: str | PathLike):
283286 "phosphosite-irefindex13.0-uniprot.txt" : CacheItem (
284287 name = "iRefIndex v13.0 UniProt interactome" ,
285288 cached = "https://drive.google.com/uc?id=1fQ8Z3FjEwUseEtsExO723zj7mAAtdomo" ,
286- pinned = "https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/networks/phosphosite-irefindex13.0-uniprot.txt"
289+ pinned = "https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/networks/phosphosite-irefindex13.0-uniprot.txt" ,
287290 )
288291 },
289292 "OsmoticStress" : {
290293 "yeast_pcsf_network.sif" : CacheItem (
291294 # In the paper https://doi.org/10.1016/j.celrep.2018.08.085
292295 name = "Case Study Edge Results, from Supplementary Data 3" ,
293- cached = "https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h"
296+ cached = "https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h" ,
294297 ),
295298 # The following files are from https://github.com/gitter-lab/osmotic-stress.
296299 # While the following files do point to the repository's main branch,
297300 # they aren't expected to actually change.
298301 "prizes.txt" : CacheItem (
299302 name = "Osmotic Stress Prizes" ,
300303 pinned = "https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/prizes.txt" ,
301- cached = "https://drive.google.com/uc?id=16WDQs0Vjv6rI12-hbifsbnpH31jMGhJg"
304+ cached = "https://drive.google.com/uc?id=16WDQs0Vjv6rI12-hbifsbnpH31jMGhJg" ,
302305 ),
303306 "ChasmanNetwork-DirUndir.txt" : CacheItem (
304307 name = "Network Input" ,
305308 pinned = "https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/ChasmanNetwork-DirUndir.txt" ,
306- cached = "https://drive.google.com/uc?id=1qYXPaWcPU72YYME7NaBzD7thYCHRzrLH"
309+ cached = "https://drive.google.com/uc?id=1qYXPaWcPU72YYME7NaBzD7thYCHRzrLH" ,
307310 ),
308311 "dummy.txt" : CacheItem (
309312 name = "Dummy Nodes File" ,
310313 pinned = "https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Input%20Data/dummy.txt" ,
311- cached = "https://drive.google.com/uc?id=1dsFIhBrIEahggg0JPxw64JwS51pKxoQU"
314+ cached = "https://drive.google.com/uc?id=1dsFIhBrIEahggg0JPxw64JwS51pKxoQU" ,
312315 ),
313316 "_edgeFreq.eda " : CacheItem (
314317 name = "Case Study Omics Integrator Edge Frequencies" ,
315318 pinned = "https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/Notebooks/Forest-TPS/_edgeFreq.eda" ,
316- cached = "https://drive.google.com/uc?id=1M_rxEzUCo_EVuFyM47OEH2J-4LB3eeCR"
319+ cached = "https://drive.google.com/uc?id=1M_rxEzUCo_EVuFyM47OEH2J-4LB3eeCR" ,
317320 ),
318321 "goldStandardUnionDetailed.txt" : CacheItem (
319322 name = "Gold Standard Reference Pathways" ,
320323 pinned = "https://raw.githubusercontent.com/gitter-lab/osmotic-stress/refs/heads/master/data/evaluation/goldStandardUnionDetailed.txt" ,
321- cached = "https://drive.google.com/uc?id=1-_zF9oKFCNmJbDCC2vq8OM17HJw80s2T"
324+ cached = "https://drive.google.com/uc?id=1-_zF9oKFCNmJbDCC2vq8OM17HJw80s2T" ,
322325 ),
323326 },
324327 "EGFR" : {
@@ -328,19 +331,19 @@ def download(self, output: str | PathLike):
328331 "eight-egfr-reference-all.txt" : CacheItem (
329332 name = "EGFR Gold Standard Reference" ,
330333 pinned = "https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/resources/eight-egfr-reference-all.txt" ,
331- cached = "https://drive.google.com/uc?id=15MqpIbH1GRA1tq0ZXH9oMnKytoFSzXyw"
334+ cached = "https://drive.google.com/uc?id=15MqpIbH1GRA1tq0ZXH9oMnKytoFSzXyw" ,
332335 ),
333336 "egfr-prizes.txt" : CacheItem (
334337 name = "EGFR prizes" ,
335338 pinned = "https://raw.githubusercontent.com/gitter-lab/tps/refs/heads/master/data/pcsf/egfr-prizes.txt" ,
336- cached = "https://drive.google.com/uc?id=1nI5hw-rYRZPs15UJiqokHpHEAabRq6Xj"
337- )
339+ cached = "https://drive.google.com/uc?id=1nI5hw-rYRZPs15UJiqokHpHEAabRq6Xj" ,
340+ ),
338341 },
339342 "Surfaceome" : {
340343 "table_S3_surfaceome.xlsx" : CacheItem (
341344 name = "Human surfaceome" ,
342345 unpinned = "http://wlab.ethz.ch/surfaceome/table_S3_surfaceome.xlsx" ,
343- cached = "https://docs.google.com/uc?id=1cBXYbDnAJVet0lv3BRrizV5FuqfMbBr0"
346+ cached = "https://docs.google.com/uc?id=1cBXYbDnAJVet0lv3BRrizV5FuqfMbBr0" ,
344347 )
345348 },
346349 "TranscriptionFactors" : {
@@ -357,7 +360,7 @@ def download(self, output: str | PathLike):
357360 "pc-biopax.owl.gz" : CacheItem (
358361 name = "PathwayCommons Universal BioPAX file" ,
359362 cached = "https://drive.google.com/uc?id=1R7uE2ky7fGlZThIWCOblu7iqbpC-aRr0" ,
360- pinned = "https://download.baderlab.org/PathwayCommons/PC2/v14/pc-biopax.owl.gz"
363+ pinned = "https://download.baderlab.org/PathwayCommons/PC2/v14/pc-biopax.owl.gz" ,
361364 ),
362365 "pathways.txt.gz" : CacheItem (
363366 name = "PathwayCommons Pathway Identifiers" ,
@@ -367,15 +370,14 @@ def download(self, output: str | PathLike):
367370 "denylist.txt" : CacheItem (
368371 name = "PathwayCommons small molecule denylist" ,
369372 cached = "https://drive.google.com/uc?id=1QmISJXPvVljA8oKuNYRUNbJJvZKPa_-u" ,
370- pinned = "https://download.baderlab.org/PathwayCommons/PC2/v14/blacklist.txt"
373+ pinned = "https://download.baderlab.org/PathwayCommons/PC2/v14/blacklist.txt" ,
371374 ),
372375 "intermediate" : {
373376 "pc-panther-biopax.owl" : CacheItem (
374- name = "PathwayCommons PANTHER-only BioPAX file" ,
375- cached = "https://drive.google.com/uc?id=1MklrD8CJ1BIjh_wWr_g5rrIJ5XJB7FUI"
377+ name = "PathwayCommons PANTHER-only BioPAX file" , cached = "https://drive.google.com/uc?id=1MklrD8CJ1BIjh_wWr_g5rrIJ5XJB7FUI"
376378 )
377- }
378- }
379+ },
380+ },
379381}
380382
381383
@@ -394,7 +396,9 @@ def get_cache_item(path: list[str]) -> CacheItem:
394396
395397 # Google Drive validation. TODO: remove if move to OSDF.
396398 if "uc?id=" not in current_item .cached or "/view?usp=sharing" in current_item .cached :
397- raise RuntimeError ("Make sure your Google Drive URLs are in https://drive.google.com/uc?id=... format " + \
398- "with no /view?usp=sharing at the end. See CONTRIBUTING.md for more info." )
399+ raise RuntimeError (
400+ "Make sure your Google Drive URLs are in https://drive.google.com/uc?id=... format "
401+ + "with no /view?usp=sharing at the end. See CONTRIBUTING.md for more info."
402+ )
399403
400404 return current_item
0 commit comments