Skip to content

Commit 4752341

Browse files
committed
Assemble single-cell data in an AnnData object
1 parent da5871a commit 4752341

File tree

3 files changed

+60
-4
lines changed

3 files changed

+60
-4
lines changed

gemmapy/gemmapy_api.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
import logging
88
import os
99
import subprocess
10+
import tarfile
11+
import tempfile
1012
import warnings
1113
from getpass import getpass
12-
from io import StringIO
13-
from typing import Optional, List, Callable
14+
from io import StringIO, BytesIO
15+
from os.path import join
16+
from typing import Optional, List, Callable, Any
1417

1518
import anndata as ad
1619
import numpy as np
1720
import pandas as pd
21+
import scanpy
1822
from anndata import AnnData
1923
from pandas import DataFrame
2024

@@ -1667,7 +1671,45 @@ def make_anndata(pack):
16671671
pass
16681672
return out
16691673

1670-
def get_differential_expression_values(self,
1674+
def get_single_cell_dataset_object(self, dataset: str | int,
1675+
download_dir=None) -> AnnData:
1676+
"""
1677+
:param download_dir: Directory where datasets can be downloaded, or else
1678+
the data will be retrieved in-memory.
1679+
:return:
1680+
"""
1681+
1682+
def resolve():
1683+
if download_dir:
1684+
dest = join(download_dir, dataset + '.tar')
1685+
if not os.path.exists(dest):
1686+
logger.info('Downloading single-cell data for %s to %s...',
1687+
dataset, download_dir)
1688+
with open(dest, 'wb') as f:
1689+
f.write(self.raw.get_dataset_single_cell_expression(
1690+
dataset))
1691+
return open(dest, 'rb')
1692+
else:
1693+
logger.info("Downloading single-cell data data for %s...",
1694+
str(dataset))
1695+
return BytesIO(
1696+
self.raw.get_dataset_single_cell_expression(dataset))
1697+
1698+
with (resolve() as f, tarfile.open(fileobj=f) as tf,
1699+
tempfile.TemporaryDirectory() as tmpdir):
1700+
logger.info('Extracting TAR file for %s to %s...', str(dataset),
1701+
tmpdir)
1702+
tf.extractall(tmpdir)
1703+
samples = []
1704+
for sample_dir in os.listdir(tmpdir):
1705+
logger.info('Reading MEX data for %s...', sample_dir)
1706+
# Gemma already guarantees unicity of cell identifiers and
1707+
# scanpy cannot deal with numeric gene identifiers when
1708+
# make_unique is True, so we skip that part
1709+
samples.append(scanpy.read_10x_mtx(join(tmpdir, sample_dir)))
1710+
return scanpy.concat(samples)
1711+
1712+
def get_differential_expression_values(self,
16711713
dataset:Optional[str|int] = None,
16721714
keep_non_specific:bool = False,
16731715
result_sets:Optional[List[str|int]] = None,

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ dependencies = [
1818
'pandas',
1919
'numpy',
2020
'anndata',
21+
'scanpy',
2122
'typing'
2223
]
2324

2425
[dependency-groups]
2526
dev = ["pytest"]
2627

2728
[tool.setuptools.packages]
28-
find = {}
29+
find = {}

tests/test_basic.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,19 @@ def test_auth(monkeypatch):
7070
monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD_CMD', '')
7171
gemmapy.GemmaPy()
7272

73+
def test_get_single_cell_data():
74+
# TODO: use a publicly available dataset
75+
client = gemmapy.GemmaPy()
76+
ad = client.get_single_cell_dataset_object('GSE227313', download_dir='.')
77+
print(ad)
78+
79+
def test_get_genes():
80+
assert len(api.get_genes('BRCA1')) > 0
81+
assert len(api.get_genes(['BRCA1'])) > 0
82+
assert len(api.get_genes(672)) > 0
83+
assert len(api.get_genes([672])) > 0
84+
assert len(api.get_genes([672, 'BRCA1'])) > 0
85+
7386
def test_get_result_sets():
7487
res = api.get_result_sets([200])
7588

0 commit comments

Comments
 (0)