jkmckenna
diff --git a/‎docs/source/api/datasets.md‎
Lines changed: 16 additions & 1 deletion b/‎docs/source/api/datasets.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎docs/source/api/informatics.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/source/api/informatics.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/source/api/preprocessing.md‎
Lines changed: 15 additions & 0 deletions b/‎docs/source/api/preprocessing.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎docs/source/api/tools.md‎
Lines changed: 16 additions & 1 deletion b/‎docs/source/api/tools.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎src/smftools/datasets/datasets.py‎
Lines changed: 26 additions & 17 deletions b/‎src/smftools/datasets/datasets.py‎
Lines changed: 26 additions & 17 deletions
diff --git a/‎src/smftools/informatics/converted_BAM_to_adata.py‎
Lines changed: 31 additions & 30 deletions b/‎src/smftools/informatics/converted_BAM_to_adata.py‎
Lines changed: 31 additions & 30 deletions
@@ -6,4 +6,19 @@
 
 ```{eval-rst}
 .. currentmodule:: smftools
-```
+```
+
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/datasets
+   :recursive:
+
+   smftools.datasets
+```
+
+```{eval-rst}
+.. automodule:: smftools.datasets
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
@@ -15,6 +15,21 @@
 
 Processes raw sequencing data to load an adata object.
 
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/informatics
+   :recursive:
+
+   smftools.informatics
+```
+
+```{eval-rst}
+.. automodule:: smftools.informatics
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
+
 
 ### Diagram of final steps of Direct SMF workflow
 ```{image} ../_static/modkit_extract_to_adata.png
 
@@ -12,3 +12,18 @@
 ```{eval-rst}
 .. currentmodule:: smftools
 ```
+
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/preprocessing
+   :recursive:
+
+   smftools.preprocessing
+```
+
+```{eval-rst}
+.. automodule:: smftools.preprocessing
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
@@ -6,4 +6,19 @@
 
 ```{eval-rst}
 .. currentmodule:: smftools
-```
+```
+
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/tools
+   :recursive:
+
+   smftools.tools
+```
+
+```{eval-rst}
+.. automodule:: smftools.tools
+   :members:
+   :undoc-members:
+   :show-inheritance:
+```
@@ -1,33 +1,42 @@
-## datasets
+"""Dataset helpers for bundled SMF datasets."""
 
+from __future__ import annotations
 
-def import_HERE():
-    """
-    Imports HERE for loading datasets
-    """
-    from pathlib import Path
+from pathlib import Path
+from typing import TYPE_CHECKING
 
-    HERE = Path(__file__).parent
-    return HERE
+if TYPE_CHECKING:
+    import anndata as ad
 
 
-def dCas9_kinetics():
+def import_HERE() -> Path:
+    """Resolve the local dataset directory.
+
+    Returns:
+        Path: Path to the datasets directory.
     """
-    in vitro Hia5 dCas9 kinetics SMF dataset. Nanopore HAC m6A modcalls.
+    return Path(__file__).parent
+
+
+def dCas9_kinetics() -> "ad.AnnData":
+    """Load the in vitro Hia5 dCas9 kinetics SMF dataset.
+
+    Returns:
+        anndata.AnnData: Annotated dataset with Nanopore HAC m6A modcalls.
     """
     import anndata as ad
 
-    HERE = import_HERE()
-    filepath = HERE / "dCas9_m6A_invitro_kinetics.h5ad.gz"
+    filepath = import_HERE() / "dCas9_m6A_invitro_kinetics.h5ad.gz"
     return ad.read_h5ad(filepath)
 
 
-def Kissiov_and_McKenna_2025():
-    """
-    F1 Hybrid M.CviPI natural killer cell SMF. Nanopore canonical calls of NEB EMseq converted SMF gDNA.
+def Kissiov_and_McKenna_2025() -> "ad.AnnData":
+    """Load the F1 Hybrid M.CviPI natural killer cell SMF dataset.
+
+    Returns:
+        anndata.AnnData: Annotated dataset with canonical calls of NEB EMseq converted SMF gDNA.
     """
     import anndata as ad
 
-    HERE = import_HERE()
-    filepath = HERE / "F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz"
+    filepath = import_HERE() / "F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz"
     return ad.read_h5ad(filepath)
@@ -27,38 +27,39 @@
 
 
 def converted_BAM_to_adata(
-    converted_FASTA,
-    split_dir,
-    output_dir,
-    input_already_demuxed,
-    mapping_threshold,
-    experiment_name,
-    conversions,
-    bam_suffix,
-    device="cpu",
-    num_threads=8,
-    deaminase_footprinting=False,
-    delete_intermediates=True,
-    double_barcoded_path=None,
-):
-    """
-    Converts BAM files into an AnnData object by binarizing modified base identities.
-
-    Parameters:
-        converted_FASTA (Path): Path to the converted FASTA reference.
-        split_dir (Path): Directory containing converted BAM files.
-        output_dir (Path): Directory of the output dir
-        input_already_demuxed (bool): Whether input reads were originally demuxed
-        mapping_threshold (float): Minimum fraction of aligned reads required for inclusion.
-        experiment_name (str): Name for the output AnnData object.
-        conversions (list): List of modification types (e.g., ['unconverted', '5mC', '6mA']).
-        bam_suffix (str): File suffix for BAM files.
-        num_threads (int): Number of parallel processing threads.
-        deaminase_footprinting (bool): Whether the footprinting was done with a direct deamination chemistry.
-        double_barcoded_path (Path): Path to dorado demux summary file of double ended barcodes
+    converted_FASTA: str | Path,
+    split_dir: Path,
+    output_dir: Path,
+    input_already_demuxed: bool,
+    mapping_threshold: float,
+    experiment_name: str,
+    conversions: list[str],
+    bam_suffix: str,
+    device: str | torch.device = "cpu",
+    num_threads: int = 8,
+    deaminase_footprinting: bool = False,
+    delete_intermediates: bool = True,
+    double_barcoded_path: Path | None = None,
+) -> tuple[ad.AnnData | None, Path]:
+    """Convert BAM files into an AnnData object by binarizing modified base identities.
+
+    Args:
+        converted_FASTA: Path to the converted FASTA reference.
+        split_dir: Directory containing converted BAM files.
+        output_dir: Output directory for intermediate and final files.
+        input_already_demuxed: Whether input reads were originally demultiplexed.
+        mapping_threshold: Minimum fraction of aligned reads required for inclusion.
+        experiment_name: Name for the output AnnData object.
+        conversions: List of modification types (e.g., ``["unconverted", "5mC", "6mA"]``).
+        bam_suffix: File suffix for BAM files.
+        device: Torch device or device string.
+        num_threads: Number of parallel processing threads.
+        deaminase_footprinting: Whether the footprinting used direct deamination chemistry.
+        delete_intermediates: Whether to remove intermediate files after processing.
+        double_barcoded_path: Path to dorado demux summary file of double-ended barcodes.
 
     Returns:
-        str: Path to the final AnnData object.
+        tuple[anndata.AnnData | None, Path]: The AnnData object (if generated) and its path.
     """
     if torch.cuda.is_available():
         device = torch.device("cuda")