Merge pull request #131 from JonathanShor/dev-v2.5.2

JonathanShor · web-flow · commit 7d324f8ab259 · 2019-08-25T16:51:41.000-04:00
Fixes #129 by ensuring proper dependencies in setup.py Fixes #130
diff --git a/README.md b/README.md
@@ -1,22 +1,30 @@
 # DoubletDetection
+
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2678042.svg)](https://doi.org/10.5281/zenodo.2678042)
 [![Documentation Status](https://readthedocs.org/projects/doubletdetection/badge/?version=latest)](https://doubletdetection.readthedocs.io/en/latest/?badge=latest)
 
-
 DoubletDetection is a Python3 package to detect doublets (technical errors) in single-cell RNA-seq count matrices.
 
+## Installing DoubletDetection
 
-To install DoubletDetection:
-
-```
+```bash
 git clone https://github.com/JonathanShor/DoubletDetection.git
 cd DoubletDetection
 pip3 install .
 ```
 
-To run basic doublet classification:
+If you are using `pipenv` as your virtual environment, it may struggle installing from the setup.py due to our custom Phenograph requirement.
+If so, try the following in the cloned repo:
 
+```bash
+pipenv run pip3 install .
 ```
+
+## Running DoubletDetection
+
+To run basic doublet classification:
+
+```Python
 import doubletdetection
 clf = doubletdetection.BoostClassifier()
 # raw_counts is a cells by genes count matrix
@@ -27,6 +35,7 @@ labels = clf.fit(raw_counts).predict()
 - `labels` is a 1-dimensional numpy ndarray with the value 1 representing a detected doublet, 0 a singlet, and `np.nan` an ambiguous cell.
 
 The classifier works best when
+
 - There are several cell types present in the data
 - It is applied individually to each run in an aggregated count matrix
 
@@ -35,10 +44,11 @@ In `v2.5` we have added a new experimental clustering method (`scanpy`'s Louvain
 See our [jupyter notebook](https://nbviewer.jupyter.org/github/JonathanShor/DoubletDetection/blob/master/tests/notebooks/PBMC_8k_vignette.ipynb) for an example on 8k PBMCs from 10x.
 
 ## Obtaining data
-Data can be downloaded from the [10x website](https://support.10xgenomics.com/single-cell/datasets).
 
+Data can be downloaded from the [10x website](https://support.10xgenomics.com/single-cell/datasets).
 
 ## Citations
+
 bioRxiv submission and journal publication expected in the coming months. Please use the following for now:
 
 Gayoso, Adam, & Shor, Jonathan. (2018, July 17). DoubletDetection (Version v2.4). Zenodo. http://doi.org/10.5281/zenodo.2678042
diff --git a/docs/conf.py b/docs/conf.py
@@ -23,7 +23,7 @@
 author = "Adam Gayoso and Jonathan Shor"
 
 # The full version, including alpha/beta/rc tags
-release = "2.5.0"
+release = "2.5.2"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/doubletdetection/doubletdetection.py b/doubletdetection/doubletdetection.py
@@ -3,18 +3,18 @@
 import collections
 import warnings
 
+import anndata
 import numpy as np
-from sklearn.utils import check_array
-from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
-from scipy.io import mmread
-from scipy.stats import hypergeom
+import phenograph
 import scipy.sparse as sp_sparse
-from scipy.sparse import csr_matrix
 import tables
 import scanpy as sc
-import anndata
+from scipy.io import mmread
+from scipy.sparse import csr_matrix
+from scipy.stats import hypergeom
+from sklearn.utils import check_array
+from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1
 from tqdm.auto import tqdm
-import phenograph
 
 
 def load_10x_h5(file, genome):
@@ -346,14 +346,17 @@ def _one_fit(self):
         sc.tl.pca(aug_counts, n_comps=self.n_components, random_state=self.random_state)
         if self.verbose:
             print("Clustering augmented data set...\n")
-        sc.pp.neighbors(
-            aug_counts, random_state=self.random_state, method="umap", n_neighbors=10
-        )
         if self.use_phenograph:
             fullcommunities, _, _ = phenograph.cluster(
                 aug_counts.obsm["X_pca"], **self.phenograph_parameters
             )
         else:
+            sc.pp.neighbors(
+                aug_counts,
+                random_state=self.random_state,
+                method="umap",
+                n_neighbors=10,
+            )
             sc.tl.louvain(
                 aug_counts, random_state=self.random_state, resolution=4, directed=False
             )
diff --git a/doubletdetection/plot.py b/doubletdetection/plot.py
@@ -1,12 +1,11 @@
-from sklearn.decomposition import PCA
-import umap
-
 import os
 import warnings
-import numpy as np
-from sklearn.utils import check_array
 
 import matplotlib
+import numpy as np
+import umap
+from sklearn.decomposition import PCA
+from sklearn.utils import check_array
 
 try:
     os.environ["DISPLAY"]
diff --git a/setup.py b/setup.py
@@ -16,7 +16,7 @@
 
 setup(
     name="doubletdetection",
-    version="2.5.0",
+    version="2.5.2",
     description="Method to detect and enable removal of doublets from single-cell RNA-sequencing "
     "data",
     url="https://github.com/JonathanShor/DoubletDetection",
@@ -31,7 +31,11 @@
         "scikit-learn",
         "tables>=3.4.2",
         "umap-learn>=0.3.7",
-        "matplotlib>=2.2.2",
+        "matplotlib>=3.1",
         "phenograph @ https://api.github.com/repos/JonathanShor/PhenoGraph/tarball/v1.6",
+        "scanpy>=1.4.4",
+        "louvain",
+        "tqdm",
+        "anndata",
     ],
 )