Skip to content

Commit 5f61e1a

Browse files
authored
Facilitate cache directory override
1 parent 5b38cf2 commit 5f61e1a

File tree

2 files changed

+16
-10
lines changed

2 files changed

+16
-10
lines changed

maayanlab_bioinformatics/harmonization/ncbi_genes.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from maayanlab_bioinformatics.utils import fetch_save_read
44

55
@lru_cache()
6-
def ncbi_genes_fetch(organism='Mammalia/Homo_sapiens', filters=lambda ncbi: ncbi['type_of_gene']=='protein-coding'):
6+
def ncbi_genes_fetch(organism='Mammalia/Homo_sapiens', filters=lambda ncbi: ncbi['type_of_gene']=='protein-coding', cache_dir=''):
77
''' Fetch the current NCBI Human Gene Info database.
88
See ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/ for the directory/file of the organism of interest.
99
'''
@@ -28,6 +28,7 @@ def supplement_dbXref_prefix_omitted(ids):
2828
'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/{}.gene_info.gz'.format(organism),
2929
'{}.gene_info.tsv'.format(organism),
3030
sep='\t',
31+
cache_dir=cache_dir,
3132
)
3233
if filters and callable(filters):
3334
ncbi = ncbi[filters(ncbi)]
@@ -47,15 +48,15 @@ def supplement_dbXref_prefix_omitted(ids):
4748
return ncbi
4849

4950
@lru_cache()
50-
def ncbi_genes_lookup(organism='Mammalia/Homo_sapiens', filters=lambda ncbi: ncbi['type_of_gene']=='protein-coding'):
51+
def ncbi_genes_lookup(organism='Mammalia/Homo_sapiens', filters=lambda ncbi: ncbi['type_of_gene']=='protein-coding', cache_dir=''):
5152
''' Return a lookup dictionary with synonyms as the keys, and official symbols as the values
5253
Usage:
5354
```python
5455
ncbi_lookup = ncbi_genes_lookup('Mammalia/Homo_sapiens')
5556
print(ncbi_lookup('STAT3')) # any alias will get converted into the official symbol
5657
```
5758
'''
58-
ncbi_genes = ncbi_genes_fetch(organism=organism, filters=filters)
59+
ncbi_genes = ncbi_genes_fetch(organism=organism, filters=filters, cache_dir=cache_dir)
5960
synonyms, symbols = zip(*{
6061
(synonym, gene_info['Symbol'])
6162
for _, gene_info in ncbi_genes.iterrows()
Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
import os
22
import pandas as pd
33

4-
def fetch_save_read(url, file, reader=pd.read_csv, sep=',', **kwargs):
4+
def fetch_save_read(url, file, cache_dir='', reader=pd.read_csv, sep=',', **kwargs):
55
''' Download file from {url}, save it to {file}, and subsequently read it with {reader} using pandas options on {**kwargs}.
66
'''
7-
if not os.path.exists(file):
8-
if os.path.dirname(file):
9-
os.makedirs(os.path.dirname(file), exist_ok=True)
10-
df = reader(url, sep=sep, index_col=None)
11-
df.to_csv(file, sep=sep, index=False)
12-
return pd.read_csv(file, sep=sep, **kwargs)
7+
if cache_dir or cache_dir == '':
8+
path = file if cache_dir == '' else cache_dir + os.path.sep + file
9+
if not os.path.exists(path):
10+
if os.path.dirname(path):
11+
os.makedirs(os.path.dirname(path), exist_ok=True)
12+
df = reader(url, sep=sep, index_col=None)
13+
df.to_csv(path, sep=sep, index=False)
14+
return reader(path, sep=sep, **kwargs)
15+
else:
16+
return reader(url, sep=sep, **kwargs)
17+

0 commit comments

Comments
 (0)