quadbio · Marius1311 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,12 +11,14 @@ and this project adheres to [Semantic Versioning][].
 ## [Unreleased]
 
 ### Added
-- Included tests for the `check` module, and more tests for the main classes.
+- Included tests for the `check` module, and more tests for the main classes {pr}`15`.
+- Implemented the computation of presence scores, following HNOCA-tools {pr}`16`.
+- Add a `groupby` parameter to expression transfer evaluation {pr}`16`.
 
 ## [v0.1.1]
 
 ### Changed
-- Switched to `vcs`-based versioning.
+- Switched to `vcs`-based versioning {pr}`5`.
 
 ### Added
 - Added PyPI badge.

diff --git a/README.md b/README.md
@@ -21,13 +21,13 @@ If you don't have Python installed, we recommend installing [uv][].
 
 There are two alternative options to install ``cellmapper``:
 
-- **Install the latest release from [PyPI][]**:
+- Install the latest release from [PyPI][]:
 
   ```bash
   pip install cellmapper
   ```
 
-- **Install the latest development version**:
+- Install the latest development version:
 
   ```bash
   pip install git+https://github.com/quadbio/cellmapper.git@main

diff --git a/docs/api.md b/docs/api.md
@@ -1,39 +1,25 @@
-# API
-
-## Preprocessing
+# API Reference
 
 ```{eval-rst}
-.. module:: cellmapper.pp
-.. currentmodule:: cellmapper
-
-.. autosummary::
-    :toctree: generated
+The class :class:`~cellmapper.CellMapper` is the main class that users interact with:
 
-    pp.basic_preproc
-    pp.elaborate_example
-```
-
-## Tools
-
-```{eval-rst}
-.. module:: cellmapper.tl
+.. module:: cellmapper
 .. currentmodule:: cellmapper
 
 .. autosummary::
     :toctree: generated
 
-    tl.basic_tool
+    CellMapper
 ```
 
-## Plotting
-
 ```{eval-rst}
-.. module:: cellmapper.pl
+The following classes are more technical; :class:`~cellmapper.Neighbors` is called under the hood for k-NN graph computation.
+
+.. module:: cellmapper
 .. currentmodule:: cellmapper
 
 .. autosummary::
     :toctree: generated
 
-    pl.basic_plot
-    pl.BasicClass
+    Neighbors
 ```
diff --git a/docs/conf.py b/docs/conf.py
@@ -60,6 +60,7 @@
     "IPython.sphinxext.ipython_console_highlighting",
     "sphinxext.opengraph",
     *[p.stem for p in (HERE / "extensions").glob("*.py")],
+    "sphinx.ext.extlinks",
 ]
 
 autosummary_generate = True
@@ -98,6 +99,13 @@
     "numpy": ("https://numpy.org/doc/stable/", None),
 }
 
+# extlinks config
+extlinks = {
+    "issue": (f"{repository_url}/issues/%s", "#%s"),
+    "pr": (f"{repository_url}/pull/%s", "#%s"),
+    "ghuser": ("https://github.com/%s", "@%s"),
+}
+
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.

diff --git a/docs/references.bib b/docs/references.bib
@@ -8,3 +8,63 @@ @article{Virshup_2023
 	title = {The scverse project provides a computational ecosystem for single-cell omics data analysis},
 	journal = {Nature Biotechnology}
 }
+
+@article{cao2022multi,
+  title={Multi-omics single-cell data integration and regulatory inference with graph-linked embedding},
+  author={Cao, Zhi-Jie and Gao, Ge},
+  journal={Nature Biotechnology},
+  volume={40},
+  number={10},
+  pages={1458--1466},
+  year={2022},
+  publisher={Nature Publishing Group US New York},
+  url={https://www.nature.com/articles/s41587-022-01284-4},
+}
+
+@article{van2018recovering,
+  title={Recovering gene interactions from single-cell data using data diffusion},
+  author={Van Dijk, David and Sharma, Roshan and Nainys, Juozas and Yim, Kristina and Kathail, Pooja and Carr, Ambrose J and Burdziak, Cassandra and Moon, Kevin R and Chaffer, Christine L and Pattabiraman, Diwakar and others},
+  journal={Cell},
+  volume={174},
+  number={3},
+  pages={716--729},
+  year={2018},
+  publisher={Elsevier},
+  url={https://www.sciencedirect.com/science/article/pii/S0092867418307244?via%3Dihub},
+}
+
+@article{lotfollahi2022mapping,
+  title={Mapping single-cell data to reference atlases by transfer learning},
+  author={Lotfollahi, Mohammad and Naghipourfar, Mohsen and Luecken, Malte D and Khajavi, Matin and B{\"u}ttner, Maren and Wagenstetter, Marco and Avsec, {\v{Z}}iga and Gayoso, Adam and Yosef, Nir and Interlandi, Marta and others},
+  journal={Nature biotechnology},
+  volume={40},
+  number={1},
+  pages={121--130},
+  year={2022},
+  publisher={Nature Publishing Group US New York},
+  url={https://www.nature.com/articles/s41587-021-01001-7},
+}
+
+@article{he2024integrated,
+  title={An integrated transcriptomic cell atlas of human neural organoids},
+  author={He, Zhisong and Dony, Leander and Fleck, Jonas Simon and Sza{\l}ata, Artur and Li, Katelyn X and Sli{\v{s}}kovi{\'c}, Irena and Lin, Hsiu-Chuan and Santel, Malgorzata and Atamian, Alexander and Quadrato, Giorgia and others},
+  journal={Nature},
+  volume={635},
+  number={8039},
+  pages={690--698},
+  year={2024},
+  publisher={Nature Publishing Group UK London},
+  url={https://www.nature.com/articles/s41586-024-08172-8},
+}
+
+@article{li2022benchmarking,
+  title={Benchmarking spatial and single-cell transcriptomics integration methods for transcript distribution prediction and cell type deconvolution},
+  author={Li, Bin and Zhang, Wen and Guo, Chuang and Xu, Hao and Li, Longfei and Fang, Minghao and Hu, Yinlei and Zhang, Xinye and Yao, Xinfeng and Tang, Meifang and others},
+  journal={Nature methods},
+  volume={19},
+  number={6},
+  pages={662--670},
+  year={2022},
+  publisher={Nature Publishing Group US New York},
+  url={https://www.nature.com/articles/s41592-022-01480-9},
+}
diff --git a/src/cellmapper/__init__.py b/src/cellmapper/__init__.py
@@ -1,8 +1,9 @@
 from importlib.metadata import version
 
 from .cellmapper import CellMapper
+from .knn import Neighbors
 from .logging import logger
 
-__all__ = ["logger", "CellMapper"]
+__all__ = ["logger", "CellMapper", "Neighbors"]
 
 __version__ = version("cellmapper")
diff --git a/src/cellmapper/cellmapper.py b/src/cellmapper/cellmapper.py
@@ -186,9 +186,7 @@ def compute_neighbors(
         use_rep
             Data representation based on which to find nearest neighbors. If None, a joint PCA will be computed.
         method
-            Method to use for computing neighbors. "sklearn" and "pynndescent" run on CPU, "rapids" and "faiss" run on GPU.
-            Note that all but "pynndescent" perform exact neighbor search. With GPU acceleration, "faiss" is usually
-            fastest and more memory efficient than "rapids".
+            Method to use for computing neighbors. "sklearn" and "pynndescent" run on CPU, "rapids" and "faiss" run on GPU. Note that all but "pynndescent" perform exact neighbor search. With GPU acceleration, "faiss" is usually fastest and more memory efficient than "rapids".
         metric
             Distance metric to use for nearest neighbors.
         only_yx
@@ -202,13 +200,15 @@ def compute_neighbors(
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        knn
-            Nearest neighbors object.
-        n_neighbors
-            Number of nearest neighbors.
-        only_yx
-            Whether only yx neighbors were computed.
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``knn``: Nearest neighbors object.
+        - ``n_neighbors``: Number of nearest neighbors.
+        - ``only_yx``: Whether only yx neighbors were computed.
         """
         self.n_neighbors = n_neighbors
         self.only_yx = only_yx
@@ -237,18 +237,23 @@ def compute_mappping_matrix(
         ----------
         method
             Method to use for computing the mapping matrix. Options include:
-            - "jaccard": Jaccard similarity. Inspired by GLUE: Cao et al., Nature Biotechnology, 2022: https://www.nature.com/articles/s41587-022-01284-4
-            - "gaussian": Gaussian kernel with adaptive bandwith. Loosely inspired by MAGIC: Van Dijk et al., Cell, 2018: https://www.sciencedirect.com/science/article/pii/S0092867418307244?via%3Dihub
-            - "scarches": scArches kernel. Inspired by scArches: Lotfollahi et al., Nature Biotechnology, 2021: https://www.nature.com/articles/s41587-021-01001-7
+
+            - "jaccard": Jaccard similarity. Inspired by GLUE :cite:`cao2022multi`
+            - "gaussian": Gaussian kernel with adaptive bandwidth. Loosely inspired by MAGIC :cite:`van2018recovering`
+            - "scarches": scArches kernel. Inspired by scArches :cite:`lotfollahi2022mapping`
             - "inverse_distance": Inverse distance kernel.
             - "random": Random kernel, useful for testing.
-            - "hnoca": HNOCA kernel. Inspired by HNOCA-tools: He et al., Nature 2024: https://www.nature.com/articles/s41586-024-08172-8
+            - "hnoca": HNOCA kernel. Inspired by HNOCA-tools :cite:`he2024integrated`
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        mapping_matrix
-            Mapping matrix for label transfer.
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``mapping_matrix``: Mapping matrix for label transfer.
         """
         if self.knn is None or self.n_neighbors is None:
             raise ValueError("Neighbors have not been computed. Call compute_neighbors() first.")
@@ -277,24 +282,26 @@ def transfer_labels(
         self, obs_keys: str | list[str], prediction_postfix: str = "pred", confidence_postfix: str = "conf"
     ) -> None:
         """
-        Transfer discrete labels from reference dataset to query dataset for one or more keys
+        Transfer discrete labels from reference dataset to query dataset for one or more keys.
 
         Parameters
         ----------
         obs_keys
             One or more keys in ``ref.obs`` to be transferred into ``query.obs`` (must be discrete)
         prediction_postfix
-            New ``query.obs`` key added for the transferred labels,
-            by default ``{obs_key}_pred`` for each obs_key.
+            New ``query.obs`` key added for the transferred labels, by default ``{obs_key}_pred`` for each obs_key.
         confidence_postfix
-            New ``query.obs`` key added for the transferred label confidence,
-            by default ``{obs_key}_conf`` for each obs_key.
+            New ``query.obs`` key added for the transferred label confidence, by default ``{obs_key}_conf`` for each obs_key.
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        query.obs
-            Contains the transferred labels and their confidence scores.
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``query.obs``: Contains the transferred labels and their confidence scores.
         """
         if self.mapping_matrix is None:
             raise ValueError("Mapping matrix has not been computed. Call compute_mapping_matrix() first.")
@@ -352,9 +359,13 @@ def transfer_embeddings(self, obsm_keys: str | list[str], prediction_postfix: st
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        query.obsm
-            Contains the transferred embeddings.
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``query.obsm``: Contains the transferred embeddings.
         """
         if self.mapping_matrix is None:
             raise ValueError("Mapping matrix has not been computed. Call compute_mapping_matrix() first.")
@@ -386,7 +397,11 @@ def transfer_expression(self, layer_key: str) -> None:
 
         Returns
         -------
-        Nothing, but creates/updates self.query_imputed with the transferred data in .X.
+        None
+
+        Notes
+        -----
+        Creates/updates ``self.query_imputed`` with the transferred data in .X.
         The new AnnData object will have the same cells as the query, but the features (genes) of the reference.
         """
         if self.mapping_matrix is None:

diff --git a/src/cellmapper/evaluate.py b/src/cellmapper/evaluate.py
@@ -86,14 +86,18 @@ def evaluate_label_transfer(
             Key in .obs storing ground-truth cell type annotations.
         confidence_cutoff
             Minimum confidence score required to include a cell in the evaluation.
-        zero_divisions
-            How to handle zero divisions in sklearn metrics comptuation.
+        zero_division
+            How to handle zero divisions in sklearn metrics computation.
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        label_transfer_metrics
-            Dictionary containing accuracy, precision, recall, F1 scores, and excluded fraction.
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``label_transfer_metrics``: Dictionary containing accuracy, precision, recall, F1 scores, and excluded fraction.
         """
         if self.prediction_postfix is None or self.confidence_postfix is None:
             raise ValueError("Label transfer has not been performed. Call transfer_labels() first.")
@@ -180,7 +184,7 @@ def evaluate_expression_transfer(
         """
         Evaluate the agreement between imputed and original expression in the query dataset, optionally per group.
 
-        These metrics are inspired by Li et al., Nature Methods 2022 (https://www.nature.com/articles/s41592-022-01480-9).
+        These metrics are inspired by :cite:`li2022benchmarking`.
 
         Parameters
         ----------
@@ -193,13 +197,15 @@ def evaluate_expression_transfer(
 
         Returns
         -------
-        Nothing, but updates the following attributes:
-        expression_transfer_metrics
-            Dictionary containing the average metric and number of genes used for the evaluation.
-        query.var[f"metric_{method}"]
-            Per-gene metric values (overall, across all cells).
-        query.varm[f"metric_{method}"]
-            Per-gene, per-group metric values (if groupby is provided).
+        None
+
+        Notes
+        -----
+        Updates the following attributes:
+
+        - ``expression_transfer_metrics``: Dictionary containing the average metric and number of genes used for the evaluation.
+        - ``query.var[metric_name]``: Per-gene metric values (overall, across all cells).
+        - ``query.varm[metric_name]``: Per-gene, per-group metric values (if groupby is provided).
         """
         imputed_x, original_x, shared_genes = self._get_aligned_expression_arrays(layer_key)
 
@@ -329,7 +335,7 @@ def estimate_presence_score(
         """
         Estimate raw presence scores for each reference cell based on query-to-reference connectivities.
 
-        Adapted from the HNOCA-tools package: https://github.com/devsystemslab/HNOCA-tools
+        Adapted from the HNOCA-tools package :cite:`he2024integrated`.
 
         Parameters
         ----------