Skip to content

Commit b93bce7

Browse files
committed
Assemble single-cell data in an AnnData object
1 parent 125de27 commit b93bce7

File tree

2 files changed

+52
-8
lines changed

2 files changed

+52
-8
lines changed

gemmapy/gemmapy_api.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
import logging
88
import os
99
import subprocess
10+
import tarfile
11+
import tempfile
1012
import warnings
1113
from getpass import getpass
12-
from io import StringIO
13-
from typing import Optional, List, Callable
14+
from io import StringIO, BytesIO
15+
from os.path import join
16+
from typing import Optional, List, Callable, Any
1417

1518
import anndata as ad
1619
import numpy as np
1720
import pandas as pd
21+
import scanpy
1822
from anndata import AnnData
1923
from pandas import DataFrame
2024

@@ -1667,12 +1671,51 @@ def make_anndata(pack):
16671671
pass
16681672
return out
16691673

1670-
def get_differential_expression_values(self,
1671-
dataset:Optional[str|int] = None,
1672-
keep_non_specific:bool = False,
1673-
result_sets:Optional[List[str|int]] = None,
1674-
readable_contrasts:bool = False,
1675-
**kwargs)->List[DataFrame]:
1674+
def get_single_cell_dataset_object(self, dataset: str | int,
1675+
download_dir=None) -> AnnData:
1676+
"""
1677+
:param download_dir: Directory where datasets can be downloaded, or else
1678+
the data will be retrieved in-memory.
1679+
:return:
1680+
"""
1681+
1682+
def resolve():
1683+
if download_dir:
1684+
dest = join(download_dir, dataset + '.tar')
1685+
if not os.path.exists(dest):
1686+
logger.info('Downloading single-cell data for %s to %s...',
1687+
dataset, download_dir)
1688+
with open(dest, 'wb') as f:
1689+
f.write(self.raw.get_dataset_single_cell_expression(
1690+
dataset))
1691+
return open(dest, 'rb')
1692+
else:
1693+
logger.info("Downloading single-cell data data for %s...",
1694+
str(dataset))
1695+
return BytesIO(
1696+
self.raw.get_dataset_single_cell_expression(dataset))
1697+
1698+
with (resolve() as f, tarfile.open(fileobj=f) as tf,
1699+
tempfile.TemporaryDirectory() as tmpdir):
1700+
logger.info('Extracting TAR file for %s to %s...', str(dataset),
1701+
tmpdir)
1702+
tf.extractall(tmpdir)
1703+
samples = []
1704+
for sample_dir in os.listdir(tmpdir):
1705+
logger.info('Reading MEX data for %s...', sample_dir)
1706+
# Gemma already guarantees unicity of cell identifiers and
1707+
# scanpy cannot deal with numeric gene identifiers when
1708+
# make_unique is True, so we skip that part
1709+
samples.append(scanpy.read_10x_mtx(join(tmpdir, sample_dir),
1710+
make_unique=False))
1711+
return scanpy.concat(samples, axis="var")
1712+
1713+
def get_differential_expression_values(self,
1714+
dataset: Optional[str | int] = None,
1715+
keep_non_specific: bool = False,
1716+
result_sets: Optional[List[str | int]] = None,
1717+
readable_contrasts: bool = False,
1718+
**kwargs) -> List[DataFrame]:
16761719
"""
16771720
Retrieves the differential expression resultSet(s) associated with the dataset.
16781721
If there is more than one resultSet, use get_result_sets() to see the options

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ install_requires =
1717
pandas
1818
numpy
1919
anndata
20+
scanpy
2021
typing
2122

2223
#[options.packages.find]

0 commit comments

Comments
 (0)