Skip to content

Commit 066d9b8

Browse files
authored
Merge pull request #167 from JonathanShor/doc_refresh
Update docs
2 parents c0189f6 + 7c238db commit 066d9b8

File tree

9 files changed

+163
-147
lines changed

9 files changed

+163
-147
lines changed

docs/api.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# API
2+
3+
## Plot
4+
5+
```{eval-rst}
6+
.. currentmodule:: doubletdetection
7+
8+
.. autosummary::
9+
:toctree: generated
10+
11+
BoostClassifier
12+
```
13+
14+
## Plot
15+
16+
```{eval-rst}
17+
.. currentmodule:: doubletdetection
18+
19+
.. autosummary::
20+
:toctree: generated
21+
22+
plot.convergence
23+
plot.threshold
24+
```

docs/api.rst

Lines changed: 0 additions & 24 deletions
This file was deleted.

docs/conf.py

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,68 +23,108 @@
2323
copyright = "2022, Adam Gayoso and Jonathan Shor"
2424
author = "Adam Gayoso and Jonathan Shor"
2525

26+
repository_url = f"https://github.com/JonathanShor/{project}"
27+
28+
29+
30+
templates_path = ["_templates"]
31+
html_context = {
32+
"display_github": True, # Integrate GitHub
33+
"github_user": "JonathanShor", # Username
34+
"github_repo": "DoubletDetection", # Repo name
35+
"github_version": "main", # Version
36+
"conf_py_path": "/docs/", # Path in the checkout to the docs root
37+
}
2638

2739
# -- General configuration ---------------------------------------------------
2840

2941
# Add any Sphinx extension module names here, as strings. They can be
3042
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
3143
# ones.
3244

33-
needs_sphinx = "4.3" # Nicer param docs
34-
3545
extensions = [
36-
"sphinx.ext.autodoc",
46+
"myst_nb",
3747
"sphinx.ext.viewcode",
38-
"myst_parser",
39-
"nbsphinx",
40-
"nbsphinx_link",
41-
"sphinx.ext.mathjax",
42-
"sphinx.ext.napoleon",
43-
"sphinx_autodoc_typehints", # needs to be after napoleon
48+
"sphinx.ext.autodoc",
49+
"sphinx_copybutton",
4450
"sphinx.ext.intersphinx",
4551
"sphinx.ext.autosummary",
52+
"sphinx.ext.napoleon",
53+
"sphinx.ext.extlinks",
54+
"sphinx_autodoc_typehints",
55+
"sphinx.ext.mathjax",
56+
"IPython.sphinxext.ipython_console_highlighting",
57+
"sphinxext.opengraph",
4658
]
4759

48-
# nbsphinx specific settings
49-
nbsphinx_execute = "never"
50-
51-
# Add any paths that contain templates here, relative to this directory.
52-
templates_path = ["_templates"]
60+
autosummary_generate = True
61+
autodoc_member_order = "groupwise"
62+
default_role = "literal"
63+
bibtex_reference_style = "author_year"
64+
napoleon_google_docstring = True
65+
napoleon_numpy_docstring = False
66+
napoleon_include_init_with_doc = False
67+
napoleon_use_rtype = True # having a separate entry generally helps readability
68+
napoleon_use_param = True
69+
myst_heading_anchors = 6 # create anchors for h1-h6
70+
myst_enable_extensions = [
71+
"amsmath",
72+
"colon_fence",
73+
"deflist",
74+
"dollarmath",
75+
"html_image",
76+
"html_admonition",
77+
]
78+
myst_url_schemes = ("http", "https", "mailto")
79+
nb_output_stderr = "remove"
80+
nb_execution_mode = "off"
81+
nb_merge_streams = True
82+
typehints_defaults = "braces"
83+
84+
source_suffix = {
85+
".rst": "restructuredtext",
86+
".ipynb": "myst-nb",
87+
".myst": "myst-nb",
88+
}
5389

54-
source_suffix = ".rst"
90+
intersphinx_mapping = {
91+
"anndata": ("https://anndata.readthedocs.io/en/stable/", None),
92+
"ipython": ("https://ipython.readthedocs.io/en/stable/", None),
93+
"matplotlib": ("https://matplotlib.org/", None),
94+
"numpy": ("https://numpy.org/doc/stable/", None),
95+
"pandas": ("https://pandas.pydata.org/docs/", None),
96+
"python": ("https://docs.python.org/3", None),
97+
"scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
98+
"sklearn": ("https://scikit-learn.org/stable/", None),
99+
"scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
100+
}
55101

56102
# List of patterns, relative to source directory, that match files and
57103
# directories to ignore when looking for source files.
58104
# This pattern also affects html_static_path and html_extra_path.
59105
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
60106

61-
# Generate the API documentation when building
62-
autosummary_generate = True
63-
autodoc_member_order = "bysource"
64-
napoleon_google_docstring = True
65-
napoleon_include_init_with_doc = False
66-
napoleon_use_rtype = True # having a separate entry generally helps readability
67-
napoleon_use_param = True
68-
napoleon_custom_sections = [("Params", "Parameters")]
107+
# extlinks config
108+
extlinks = {
109+
"issue": (f"{repository_url}/issues/%s", "#%s"),
110+
"pr": (f"{repository_url}/pull/%s", "#%s"),
111+
"ghuser": ("https://github.com/%s", "@%s"),
112+
}
113+
114+
69115

70116
# -- Options for HTML output -------------------------------------------------
71117

72118
# The theme to use for HTML and HTML Help pages. See the documentation for
73119
# a list of builtin themes.
74120
#
75-
html_theme = "furo"
121+
html_theme = "sphinx_book_theme"
76122

77123
html_title = "DoubletDetection"
78124

79125
html_theme_options = {
80-
"sidebar_hide_name": False,
81-
"light_css_variables": {
82-
"color-brand-primary": "#003262",
83-
"color-brand-content": "#003262",
84-
"admonition-font-size": "var(--font-size-normal)",
85-
"admonition-title-font-size": "var(--font-size-normal)",
86-
"code-font-size": "var(--font-size--small)",
87-
},
126+
"repository_url": "https://github.com/JonathanShor/DoubletDetection",
127+
"use_repository_button": True,
88128
}
89129

90130
# Add any paths that contain custom static files (such as style sheets) here,

docs/index.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
```{include} ../README.md
2+
3+
```
4+
5+
```{toctree}
6+
:hidden: true
7+
:maxdepth: 1
8+
9+
api
10+
plot
11+
tutorial
12+
```

docs/index.rst

Lines changed: 0 additions & 24 deletions
This file was deleted.

docs/tutorial.nblink

Lines changed: 0 additions & 3 deletions
This file was deleted.

doubletdetection/doubletdetection.py

Lines changed: 45 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,64 +23,51 @@ class BoostClassifier:
2323
"""Classifier for doublets in single-cell RNA-seq data.
2424
2525
Parameters:
26-
boost_rate (float, optional): Proportion of cell population size to
27-
produce as synthetic doublets.
28-
n_components (int, optional): Number of principal components used for
29-
clustering.
30-
n_top_var_genes (int, optional): Number of highest variance genes to
31-
use; other genes discarded. Will use all genes when zero.
32-
replace (bool, optional): If False, a cell will be selected as a
33-
synthetic doublet's parent no more than once.
34-
self.clustering_algorithm (str, optional): One of `["louvain", "leiden",
35-
"phenograph"]`. `"louvain"` and `leiden` refer to the scanpy implementations.
36-
clustering_kwargs (dict, optional): Keyword args to pass directly
37-
to clusering algorithm. Note that we change the PhenoGraph 'prune' default to
38-
True. We also set `directed=False` and `resolution=4` for Louvain
39-
and Leiden clustering. You must specifically include these params here
40-
to change them. `random_state` and `key_added` should not be overriden
41-
when clustering algorithm is Louvain or Leiden.
42-
n_iters (int, optional): Number of fit operations from which to collect
43-
p-values. Defualt value is 25.
44-
normalizer ((sp_sparse) -> ndarray): Method to normalize raw_counts.
45-
Defaults to normalize_counts, included in this package. Note: To use
46-
normalize_counts with its pseudocount parameter changed from the
47-
default pseudocount value to some positive float `new_var`, use:
48-
normalizer=lambda counts: doubletdetection.normalize_counts(counts,
49-
pseudocount=new_var)
50-
pseudocount (int, optional): Pseudocount used in normalize_counts.
51-
If `1` is used, and `standard_scaling=False`, the classifier is
52-
much more memory efficient; however, this may result in fewer doublets
53-
detected.
54-
random_state (int, optional): If provided, passed to PCA and used to
55-
seedrandom seed numpy's RNG. NOTE: PhenoGraph does not currently
56-
admit a random seed, and so this will not guarantee identical
57-
results across runs.
58-
verbose (bool, optional): Set to False to silence all normal operation
59-
informational messages. Defaults to True.
60-
standard_scaling (bool, optional): Set to True to enable standard scaling
61-
of normalized count matrix prior to PCA. Recommended when not using
62-
Phenograph. Defaults to False.
63-
n_jobs (int, optional): Number of jobs to use. Speeds up neighbor computation.
26+
boost_rate: Proportion of cell population size to produce as synthetic doublets.
27+
n_components: Number of principal components used for clustering.
28+
n_top_var_genes: Number of highest variance genes to use. Other genes are
29+
discarded. Will use all genes when zero.
30+
replace: If False, a cell will be selected as a synthetic doublet's parent
31+
no more than once.
32+
clustering_algorithm: One of "louvain", "leiden", or "phenograph". "louvain"
33+
and "leiden" refer to the scanpy implementations.
34+
clustering_kwargs: Keyword args to pass directly to clustering algorithm.
35+
Note that PhenoGraph 'prune' default is changed to True. For Louvain and
36+
Leiden clustering, we set `directed=False` and `resolution=4`. Include
37+
these params explicitly to change them. Do not override `random_state`
38+
and `key_added` for Louvain/Leiden.
39+
n_iters: Number of fit operations from which to collect p-values. Default is 25.
40+
normalizer: Method to normalize raw_counts. Defaults to normalize_counts from
41+
this package. To use normalize_counts with a different pseudocount value,
42+
use: `lambda counts: doubletdetection.normalize_counts(counts,
43+
pseudocount=new_value)`
44+
pseudocount: Pseudocount used in normalize_counts. Using 1 with
45+
standard_scaling=False makes the classifier more memory efficient but may
46+
detect fewer doublets.
47+
random_state: Passed to PCA and doublet parent creation. Note: PhenoGraph does not
48+
support random seeds, so identical results aren't guaranteed across runs.
49+
verbose: Set to False to silence informational messages. Defaults to True.
50+
standard_scaling: Enable standard scaling of normalized count matrix prior to
51+
PCA. Recommended when not using Phenograph. Defaults to False.
52+
n_jobs: Number of jobs to use. Speeds up neighbor computation.
6453
6554
Attributes:
66-
all_log_p_values_ (ndarray): Hypergeometric test natural log p-value per
67-
cell for cluster enrichment of synthetic doublets. Use for tresholding.
55+
all_log_p_values_: Hypergeometric test natural log p-value per cell for
56+
cluster enrichment of synthetic doublets. Use for thresholding.
6857
Shape (n_iters, num_cells).
69-
all_scores_ (ndarray): The fraction of a cell's cluster that is
70-
synthetic doublets. Shape (n_iters, num_cells).
71-
communities_ (ndarray): Cluster ID for corresponding cell. Shape
72-
(n_iters, num_cells).
73-
labels_ (ndarray, ndims=1): 0 for singlet, 1 for detected doublet.
74-
parents_ (list of sequences of int): Parent cells' indexes for each
75-
synthetic doublet. A list wrapping the results from each run.
76-
suggested_score_cutoff_ (float): Cutoff used to classify cells when
77-
n_iters == 1 (scores >= cutoff). Not produced when n_iters > 1.
78-
synth_communities_ (sequence of ints): Cluster ID for corresponding
79-
synthetic doublet. Shape (n_iters, num_cells * boost_rate).
80-
top_var_genes_ (ndarray): Indices of the n_top_var_genes used. Not
81-
generated if n_top_var_genes <= 0.
82-
voting_average_ (ndarray): Fraction of iterations each cell is called a
83-
doublet.
58+
all_scores_: The fraction of a cell's cluster that is synthetic doublets.
59+
Shape (n_iters, num_cells).
60+
communities_: Cluster ID for corresponding cell. Shape (n_iters, num_cells).
61+
labels_: 0 for singlet, 1 for detected doublet.
62+
parents_: Parent cells' indexes for each synthetic doublet. A list wrapping
63+
the results from each run.
64+
suggested_score_cutoff_: Cutoff used to classify cells when n_iters == 1
65+
(scores >= cutoff). Not produced when n_iters > 1.
66+
synth_communities_: Cluster ID for corresponding synthetic doublet.
67+
Shape (n_iters, num_cells * boost_rate).
68+
top_var_genes_: Indices of the n_top_var_genes used. Not generated if
69+
n_top_var_genes <= 0.
70+
voting_average_: Fraction of iterations each cell is called a doublet.
8471
"""
8572

8673
def __init__(
@@ -148,7 +135,7 @@ def fit(self, raw_counts: NDArray | sp_sparse.csr_matrix) -> "BoostClassifier":
148135
"""Fits the classifier on raw_counts.
149136
150137
Args:
151-
raw_counts (array-like): Count matrix, oriented cells by genes.
138+
raw_counts: Count matrix, oriented cells by genes.
152139
153140
Sets:
154141
all_scores_, all_log_p_values_, communities_,
@@ -229,9 +216,9 @@ def predict(self, p_thresh: float = 1e-7, voter_thresh: float = 0.9) -> NDArray:
229216
"""Produce doublet calls from fitted classifier
230217
231218
Args:
232-
p_thresh (float, optional): hypergeometric test p-value threshold
219+
p_thresh: hypergeometric test p-value threshold
233220
that determines per iteration doublet calls
234-
voter_thresh (float, optional): fraction of iterations a cell must
221+
voter_thresh: fraction of iterations a cell must
235222
be called a doublet
236223
237224
Sets:

pyproject.toml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,16 @@ dev = [
4848
"leidenalg",
4949
]
5050
docs = [
51-
"sphinx>=4.1,<4.4",
52-
"sphinx-autodoc-typehints",
53-
"nbsphinx",
54-
"nbsphinx-link",
55-
"furo",
56-
"myst-parser",
51+
"sphinx>=4",
52+
"sphinx-book-theme>=1.0",
53+
"myst-nb",
54+
"sphinxcontrib-bibtex>=1.0.0",
55+
"scanpydoc[typehints]>=0.7.4",
56+
"sphinxext-opengraph",
57+
# For notebooks
58+
"ipython",
59+
"ipykernel",
60+
"sphinx-copybutton",
5761
]
5862

5963
[tool.black]

0 commit comments

Comments
 (0)