Skip to content

Commit 1b852c3

Browse files
authored
Merge pull request #151 from JonathanShor/dev_4.1
Dev 4.1
2 parents d25393d + 1417437 commit 1b852c3

File tree

6 files changed

+98
-54
lines changed

6 files changed

+98
-54
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
python-version: [3.6, 3.7]
18+
python-version: [3.6, 3.7, 3.8, 3.9]
1919

2020
steps:
2121
- uses: actions/checkout@v2

.pre-commit-config.yaml

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
repos:
2-
- repo: https://gitlab.com/pycqa/flake8
3-
rev: 3.7.7
4-
hooks:
5-
- id: flake8
6-
- repo: https://github.com/ambv/black
7-
rev: stable
8-
hooks:
9-
- id: black
10-
language_version: python3.7
2+
- repo: https://github.com/psf/black
3+
rev: 22.1.0
4+
hooks:
5+
- id: black
6+
- repo: https://github.com/PyCQA/flake8
7+
rev: 4.0.1
8+
hooks:
9+
- id: flake8
10+
- repo: https://github.com/pycqa/isort
11+
rev: 5.10.1
12+
hooks:
13+
- id: isort
14+
name: isort (python)
15+
additional_dependencies: [toml]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ In `v2.5` we have added a new experimental clustering method (`scanpy`'s Louvain
5656

5757
## Tutorial
5858

59-
See our [jupyter notebook](https://nbviewer.jupyter.org/github/JonathanShor/DoubletDetection/blob/master/tests/notebooks/PBMC_10k_vignette.ipynb) for an example on 10k PBMCs from 10x Genomics.
59+
See our [tutorial](https://doubletdetection.readthedocs.io/en/latest/tutorial.html) for an example on 10k PBMCs from 10x Genomics.
6060

6161
## Obtaining data
6262

doubletdetection/doubletdetection.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,13 @@ class BoostClassifier:
4242
normalizer ((sp_sparse) -> ndarray): Method to normalize raw_counts.
4343
Defaults to normalize_counts, included in this package. Note: To use
4444
normalize_counts with its pseudocount parameter changed from the
45-
default 0.1 value to some positive float `new_var`, use:
45+
default pseudocount value to some positive float `new_var`, use:
4646
normalizer=lambda counts: doubletdetection.normalize_counts(counts,
4747
pseudocount=new_var)
48+
pseudocount (int, optional): Pseudocount used in normalize_counts.
49+
If `1` is used, and `standard_scaling=False`, the classifier is
50+
much more memory efficient; however, this may result in fewer doublets
51+
detected.
4852
random_state (int, optional): If provided, passed to PCA and used to
4953
seedrandom seed numpy's RNG. NOTE: PhenoGraph does not currently
5054
admit a random seed, and so this will not guarantee identical
@@ -87,6 +91,7 @@ def __init__(
8791
clustering_kwargs=None,
8892
n_iters=10,
8993
normalizer=None,
94+
pseudocount=0.1,
9095
random_state=0,
9196
verbose=False,
9297
standard_scaling=False,
@@ -101,6 +106,7 @@ def __init__(
101106
self.verbose = verbose
102107
self.standard_scaling = standard_scaling
103108
self.n_jobs = n_jobs
109+
self.pseudocount = pseudocount
104110

105111
if self.clustering_algorithm not in ["louvain", "phenograph", "leiden"]:
106112
raise ValueError(
@@ -297,7 +303,12 @@ def _one_fit(self):
297303
normed_synths = self._raw_synthetics.copy()
298304
inplace_csr_row_normalize_l1(normed_synths)
299305
aug_counts = sp_sparse.vstack((self._normed_raw_counts, normed_synths))
300-
aug_counts = np.log((aug_counts * np.median(aug_lib_size)).A + 0.1)
306+
scaled_aug_counts = aug_counts * np.median(aug_lib_size)
307+
if self.pseudocount != 1:
308+
aug_counts = np.log(scaled_aug_counts.A + 0.1)
309+
else:
310+
aug_counts = np.log1p(scaled_aug_counts)
311+
del scaled_aug_counts
301312

302313
aug_counts = anndata.AnnData(aug_counts)
303314
aug_counts.obs["n_counts"] = aug_lib_size
@@ -306,7 +317,14 @@ def _one_fit(self):
306317

307318
if self.verbose:
308319
print("Running PCA...")
309-
sc.tl.pca(aug_counts, n_comps=self.n_components, random_state=self.random_state)
320+
# "auto" solver faster for dense matrices
321+
solver = "arpack" if sp_sparse.issparse(aug_counts.X) else "auto"
322+
sc.tl.pca(
323+
aug_counts,
324+
n_comps=self.n_components,
325+
random_state=self.random_state,
326+
svd_solver=solver,
327+
)
310328
if self.verbose:
311329
print("Clustering augmented data set...\n")
312330
if self.clustering_algorithm == "phenograph":

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ packages = [
2020
{include = "doubletdetection"},
2121
]
2222
readme = "README.md"
23-
version = "4.0"
23+
version = "4.1"
2424

2525
[tool.poetry.dependencies]
2626
anndata = ">=0.6"

tests/notebooks/PBMC_10k_vignette.ipynb

Lines changed: 60 additions & 39 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)