Merge pull request #130 from benjeffery/inital-docs

jeromekelleher · web-flow · commit 65ee479e7143 · 2024-04-19T15:30:02.000+01:00
Add initial docs system
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,18 @@
+# Need to set PYTHONPATH so that we pick up the local bio2zarr
+PYPATH=$(shell pwd)/../
+B2Z_VERSION:=$(shell PYTHONPATH=${PYPATH} \
+   python3 -c 'import bio2zarr; print(bio2zarr.__version__.split("+")[0])')
+
+BUILDDIR      = _build
+
+dev:
+	PYTHONPATH=${PYPATH} ./build.sh
+
+dist:
+	@echo Building distribution for bio2zarr version ${B2Z_VERSION}
+	cd doxygen && doxygen
+	sed -i -e s/__BIO2ZARR_VERSION__/${B2Z_VERSION}/g _config.yml
+	PYTHONPATH=${PYPATH} ./build.sh
+
+clean:
+	rm -fR $(BUILDDIR)
diff --git a/docs/_config.yml b/docs/_config.yml
@@ -0,0 +1,36 @@
+# Book settings
+# Learn more at https://jupyterbook.org/customize/config.html
+
+title: bio2zarr Documentation
+author: sgkit developers
+logo: logo.png
+
+# Force re-execution of notebooks on each build.
+# See https://jupyterbook.org/content/execute.html
+execute:
+  execute_notebooks: force
+
+# Define the name of the latex output file for PDF builds
+latex:
+  latex_documents:
+    targetname: bio2zarr.tex
+
+# Add a bibtex file so that we can create citations
+bibtex_bibfiles:
+  - references.bib
+
+# Information about where the book exists on the web
+repository:
+  url: https://github.com/sgkit-dev/bio2zarr  # Online location of your book
+  path_to_book: docs  # Optional path to your book, relative to the repository root
+  branch: main  # Which branch of the repository should be used when creating links (optional)
+
+# Add GitHub buttons to your book
+# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
+html:
+  use_issues_button: true
+  use_repository_button: true
+
+sphinx:
+  extra_extensions:
+    - sphinx_click.ext
diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -0,0 +1,4 @@
+format: jb-book
+root: intro
+chapters:
+- file: cli
diff --git a/docs/build.sh b/docs/build.sh
@@ -0,0 +1,20 @@
+#/bin/bash
+
+# Jupyter-build doesn't have an option to automatically show the 
+# saved reports, which makes it difficult to debug the reasons for 
+# build failures in CI. This is a simple wrapper to handle that.
+
+REPORTDIR=_build/html/reports
+
+jupyter-book build -Wn --keep-going .
+RETVAL=$?
+if [ $RETVAL -ne 0 ]; then
+    if [ -e $REPORTDIR ]; then
+      echo "Error occured; showing saved reports"
+      cat $REPORTDIR/*
+    fi
+else
+    # Clear out any old reports
+    rm -f $REPORTDIR/*
+fi
+exit $RETVAL
diff --git a/docs/cli.md b/docs/cli.md
@@ -0,0 +1,10 @@
+# Command Line Interface
+
+```{eval-rst}
+.. click:: bio2zarr.cli:vcf2zarr
+   :prog: vcf2zarr
+   :show-nested:
+
+.. click:: bio2zarr.cli:plink2zarr
+   :prog: plink2zarr
+   :show-nested:
diff --git a/docs/intro.md b/docs/intro.md
@@ -0,0 +1,76 @@
+# bio2zarr Documentation
+
+`bio2zarr` efficiently converts common bioinformatics formats to 
+[Zarr](https://zarr.readthedocs.io/en/stable/) format. Initially supporting converting 
+VCF to the [sgkit vcf-zarr specification](https://github.com/pystatgen/vcf-zarr-spec/).
+
+`bio2zarr` is in early alpha development, contributions, feedback and issues are welcome
+at the [GitHub repository](https://github.com/sgkit-dev/bio2zarr).
+
+## Installation
+`bio2zarr` can be installed from PyPI using pip:
+
+```bash
+$ python3 -m pip install bio2zarr
+```
+
+This will install the programs ``vcf2zarr``, ``plink2zarr`` and ``vcf_partition``
+into your local Python path. You may need to update your $PATH to call the 
+executables directly.
+
+Alternatively, calling 
+```
+$ python3 -m bio2zarr vcf2zarr <args>
+```
+is equivalent to 
+
+```
+$ vcf2zarr <args>
+```
+and will always work.
+
+## Basic vcf2zarr usage
+For modest VCF files (up to a few GB), a single command can be used to convert a VCF file
+(or set of VCF files) to Zarr:
+
+```bash
+$ vcf2zarr convert <VCF1> <VCF2> ... <VCFN> <zarr>
+```
+
+For larger files a multi-step process is recommended. 
+
+
+First, convert the VCF into the intermediate format:
+
+```bash
+$ vcf2zarr explode tests/data/vcf/sample.vcf.gz tmp/sample.exploded
+```
+
+Then, (optionally) inspect this representation to get a feel for your dataset
+```bash
+$ vcf2zarr inspect tmp/sample.exploded
+```
+
+Then, (optionally) generate a conversion schema to describe the corresponding
+Zarr arrays:
+
+```bash
+$ vcf2zarr mkschema tmp/sample.exploded > sample.schema.json
+```
+
+View and edit the schema, deleting any columns you don't want, or tweaking 
+dtypes and compression settings to your taste.
+
+Finally, encode to Zarr:
+```bash
+$ vcf2zarr encode tmp/sample.exploded tmp/sample.zarr -s sample.schema.json
+```
+
+Use the ``-p, --worker-processes`` argument to control the number of workers used
+in the ``explode`` and ``encode`` phases.
+
+
+
+
+```{tableofcontents}
+```
diff --git a/docs/logo.png b/docs/logo.png
diff --git a/docs/references.bib b/docs/references.bib
@@ -0,0 +1,3 @@
+---
+---
+
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -0,0 +1,2 @@
+jupyter-book
+sphinx-click