tskit-dev
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/Makefile‎
Lines changed: 16 additions & 0 deletions b/‎docs/Makefile‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎docs/_config.yml‎
Lines changed: 69 additions & 0 deletions b/‎docs/_config.yml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎docs/_toc.yml‎
Lines changed: 7 additions & 0 deletions b/‎docs/_toc.yml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api.md‎
Lines changed: 79 additions & 0 deletions b/‎docs/api.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎docs/build.sh‎
Lines changed: 21 additions & 0 deletions b/‎docs/build.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎docs/cli.rst‎
Lines changed: 38 additions & 0 deletions b/‎docs/cli.rst‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎docs/intro.md‎
Lines changed: 110 additions & 0 deletions b/‎docs/intro.md‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎docs/sc2ts.png‎
495 KB b/‎docs/sc2ts.png‎
495 KB
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 9 additions & 0 deletions
@@ -67,7 +67,7 @@ alignments and metadata stored in
 
 Resources:
 
-- See this [notebook](https://github.com/jeromekelleher/sc2ts-paper/blob/main/notebooks/example_data_processing.ipynb)
+- See this [notebook](https://github.com/tskit-dev/sc2ts-paper/blob/main/notebooks/example_data_processing.ipynb)
 for an example in which we access the data variant-by-variant and
 which explains the low-level data encoding
 - See the [VCF Zarr publication](https://doi.org/10.1093/gigascience/giaf049)
 
@@ -0,0 +1,16 @@
+# Need to set PYTHONPATH so that we pick up the local sc2ts
+PYPATH=${PWD}/..
+SC2TS_VERSION:=$(shell PYTHONPATH=${PYPATH} \
+   python3 -c 'import sc2ts; print(sc2ts.__version__.split("+")[0])')
+
+dev:
+	PYTHONPATH=${PYPATH} ./build.sh
+
+dist:
+	@echo Building distribution for sc2ts version ${SC2TS_VERSION}
+	sed -i s/__SC2TS_VERSION__/${SC2TS_VERSION}/g _config.yml
+	PYTHONPATH=${PYPATH} ./build.sh
+
+clean:
+	rm -fR _build
+
@@ -0,0 +1,69 @@
+# Book settings
+# Learn more at https://jupyterbook.org/customize/config.html
+
+title: sc2ts manual
+author: sc2ts developers
+logo: sc2ts.png
+copyright: "2024"
+only_build_toc_files: true
+
+execute:
+  execute_notebooks: cache
+
+launch_buttons:
+  binderhub_url: ""
+
+repository:
+  url: https://github.com/tskit-dev/sc2ts
+  branch: main
+  path_to_book: docs
+
+html:
+  favicon: sc2ts.png
+  use_issues_button: true
+  use_repository_button: true
+  use_edit_page_button: true
+
+sphinx:
+  extra_extensions:
+    - sphinx_copybutton
+    - sphinx.ext.autodoc
+    - sphinx.ext.autosummary
+    - sphinx.ext.todo
+    - sphinx.ext.viewcode
+    - sphinx.ext.intersphinx
+    - sphinx_issues
+    - sphinxarg.ext
+    - IPython.sphinxext.ipython_console_highlighting
+    - sphinx_click.ext
+
+  config:
+    html_theme: sphinx_book_theme
+    html_theme_options:
+      navigation_with_keys: false
+      pygments_dark_style: monokai
+      logo:
+        text: "Version __SC2TS_VERSION__"
+
+    myst_enable_extensions:
+      - colon_fence
+      - deflist
+      - substitution
+
+    issues_github_path: tskit-dev/sc2ts
+    todo_include_todos: true
+
+    intersphinx_mapping:
+      python: ["https://docs.python.org/3/", null]
+      tskit: ["https://tskit.dev/tskit/docs/stable", null]
+      tutorials: ["https://tskit.dev/tutorials/", null]
+      numpy: ["https://numpy.org/doc/stable/", null]
+      pandas: ["https://pandas.pydata.org/docs/", null]
+
+    nitpicky: true
+
+    autodoc_member_order: bysource
+    autodoc_typehints: none
+
+    myst_substitutions:
+      min_python_version: "3.10"
@@ -0,0 +1,7 @@
+format: jb-book
+root: intro
+parts:
+  - caption: Interfaces
+    chapters:
+      - file: cli
+      - file: api
@@ -0,0 +1,79 @@
+# Python API
+
+This page documents the public Python API exposed by ``sc2ts``.
+Inference is driven via the command line interface (see the
+{ref}`CLI documentation <sc2ts_sec_cli>`); the functions and classes
+listed here are intended for working with tree sequences and datasets
+that have already been generated.
+
+The reference documentation is concise and exhaustive; for higher level
+discussion and worked examples, see the project README and example
+notebooks.
+
+```{eval-rst}
+.. currentmodule:: sc2ts
+```
+
+## ARG analysis
+
+```{eval-rst}
+.. autosummary::
+   node_data
+   mutation_data
+```
+
+```{eval-rst}
+.. autofunction:: node_data
+
+.. autofunction:: mutation_data
+```
+
+## Dataset access
+
+```{eval-rst}
+.. autosummary::
+   Dataset
+   decode_alignment
+   mask_ambiguous
+   mask_flanking_deletions
+```
+
+```{eval-rst}
+.. autoclass:: Dataset
+   :members:
+
+.. autofunction:: decode_alignment
+
+.. autofunction:: mask_ambiguous
+
+.. autofunction:: mask_flanking_deletions
+```
+
+## Core constants and helpers
+
+```{eval-rst}
+.. autosummary::
+   REFERENCE_STRAIN
+   REFERENCE_DATE
+   REFERENCE_GENBANK
+   REFERENCE_SEQUENCE_LENGTH
+   IUPAC_ALLELES
+   decode_flags
+   flags_summary
+```
+
+```{eval-rst}
+.. autodata:: REFERENCE_STRAIN
+
+.. autodata:: REFERENCE_DATE
+
+.. autodata:: REFERENCE_GENBANK
+
+.. autodata:: REFERENCE_SEQUENCE_LENGTH
+
+.. autodata:: IUPAC_ALLELES
+
+.. autofunction:: decode_flags
+
+.. autofunction:: flags_summary
+```
@@ -0,0 +1,21 @@
+#/bin/bash
+
+# Jupyter-build doesn't have an option to automatically show the
+# saved reports, which makes it difficult to debug the reasons for
+# build failures in CI. This is a simple wrapper to handle that.
+
+REPORTDIR=_build/html/reports
+
+jupyter-book build  .
+RETVAL=$?
+if [ $RETVAL -ne 0 ]; then
+    if [ -e $REPORTDIR ]; then
+      echo "Error occured; showing saved reports"
+      cat $REPORTDIR/*
+    fi
+else
+    # Clear out any old reports
+    rm -f $REPORTDIR/*
+fi
+exit $RETVAL
+
@@ -0,0 +1,38 @@
+.. _sc2ts_sec_cli:
+
+Command line interface
+======================
+
+The ``sc2ts`` package provides a command line interface for running
+inference and working with sc2ts datasets. After installation, the
+``sc2ts`` entry point should be available::
+
+    $ sc2ts --help
+
+You can also invoke the CLI via the module::
+
+    $ python -m sc2ts --help
+
+Order of high-level commands
+----------------------------
+
+In a typical end-to-end workflow, the main subcommands are used in the
+following order:
+
+1. ``import-alignments`` and ``import-metadata`` to build a VCF Zarr
+   dataset from raw alignments and metadata.
+2. ``infer`` to run primary inference over the dataset and produce a
+   series of tree sequence files and a match database.
+3. ``postprocess`` to apply housekeeping steps and incorporate exact
+   matches, outputting a cleaned ARG.
+4. ``minimise-metadata`` to generate an analysis-ready ARG with compact
+   metadata suitable for use with the Python analysis APIs.
+
+Below we list all subcommands and options provided by the CLI. This
+output is generated directly from the Click definitions in
+``sc2ts.cli`` using the ``sphinx-click`` extension, and so stays in
+sync with the implementation.
+
+.. click:: sc2ts.cli:cli
+   :prog: sc2ts
+   :nested: full
@@ -0,0 +1,110 @@
+# sc2ts
+
+`sc2ts` (SARS-CoV-2 to tree sequence, pronounced "scoots") provides tools
+to infer and analyse tskit ancestral recombination graphs (ARGs) for SARS-CoV-2
+at pandemic scale.
+It consists of:
+
+1. A CLI-driven method to infer ARGs from SARS-CoV-2 data.
+2. A lightweight wrapper around the :mod:`tskit` Python APIs, specialised
+   for the output of sc2ts and enabling efficient node metadata access.
+3. A lightweight wrapper around :mod:`zarr` for convenient access to the
+   Viridian dataset (alignments and metadata) in VCF Zarr format.
+
+The underlying methods are described in the sc2ts pre-print:
+<https://www.biorxiv.org/content/10.1101/2023.06.08.544212v2>.
+
+Most users will run sc2ts via the command line interface,
+which drives inference and postprocessing steps (see the
+{ref}`CLI documentation <sc2ts_sec_cli>`). The Python API is intended for
+working with tree sequences and datasets produced by sc2ts (see the
+{ref}`Python API reference <api>`).
+
+For an overview and examples, see the project README and associated
+notebooks in the repository root.
+
+## Installation
+
+Install sc2ts from PyPI:
+
+```sh
+python -m pip install sc2ts
+```
+
+This installs the minimal requirements for the analysis and dataset APIs.
+To run inference from the command line, install the optional inference
+dependencies:
+
+```sh
+python -m pip install 'sc2ts[inference]'
+```
+
+## Quick start: ARG analysis
+
+To compute summary dataframes for nodes and mutations in an inferred ARG,
+you can load an sc2ts tree sequence and call the analysis helpers. For
+example, download the sc2ts paper ARG from Zenodo:
+
+```sh
+curl -O https://zenodo.org/records/17558489/files/sc2ts_viridian_v1.2.trees.tsz
+```
+
+and then:
+
+```python
+import sc2ts
+import tszip
+
+ts = tszip.load("sc2ts_viridian_v1.2.trees.tsz")
+df_node = sc2ts.node_data(ts)
+df_mutation = sc2ts.mutation_data(ts)
+```
+
+See the {ref}`Python API reference <api>` for full details of these
+functions.
+
+## Quick start: CLI inference
+
+To run inference locally using the example Viridian dataset and config:
+
+1. Install the inference extras (if you have not already):
+
+   ```sh
+   python -m pip install 'sc2ts[inference]'
+   ```
+
+2. Download the Viridian dataset in VCF Zarr format:
+
+   ```sh
+   curl -O https://zenodo.org/records/16314739/files/viridian_mafft_2024-10-14_v1.vcz.zip
+   ```
+
+3. Run primary inference using the CLI and the example config in this repo:
+
+   ```sh
+   python -m sc2ts infer example_config.toml --stop=2020-02-02
+   ```
+
+   This will produce a series of `.ts` files and a match database in the
+   output directory specified by the config (see the README for details).
+
+4. Postprocess and generate an analysis-ready ARG:
+
+   ```sh
+   python -m sc2ts postprocess -vv \
+       --match-db example_inference/ex1.matches.db \
+       example_inference/ex1/ex1_2020-02-01.ts \
+       example_inference/ex1_2020-02-01_pp.ts
+
+   python -m sc2ts minimise-metadata \
+       -m strain sample_id \
+       -m Viridian_pangolin pango \
+       example_inference/ex1_2020-02-01_pp.ts \
+       example_inference/ex1_2020-02-01_pp_mm.ts
+   ```
+
+   The file `example_inference/ex1_2020-02-01_pp_mm.ts` can then be used
+   with the Python analysis APIs shown above.
+
+See the {ref}`CLI documentation <sc2ts_sec_cli>` for a complete listing of
+subcommands and options.
@@ -43,6 +43,15 @@ debug = [
   "matplotlib",
   "IPython",
 ]
+docs = [
+  "jupyter-book==1.0.4.post1",
+  "sphinx-book-theme",
+  "sphinx-copybutton",
+  "sphinx-click",
+  "sphinx-argparse==0.5.2",
+  "sphinx-issues==5.0.1",
+  "IPython",
+]
 
 [build-system]
 requires = [