haniffalab
diff --git a/‎.github/workflows/build-sphinx.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/build-sphinx.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎.github/workflows/sphinx-build.yml‎ ‎.github/workflows/deploy-sphinx.yml‎.github/workflows/sphinx-build.yml renamed to .github/workflows/deploy-sphinx.yml
Lines changed: 9 additions & 11 deletions b/‎.github/workflows/sphinx-build.yml‎ ‎.github/workflows/deploy-sphinx.yml‎.github/workflows/sphinx-build.yml renamed to .github/workflows/deploy-sphinx.yml
Lines changed: 9 additions & 11 deletions
diff --git a/‎.github/workflows/docker-builds.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/docker-builds.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎.github/workflows/tests-python.yml‎
Lines changed: 6 additions & 5 deletions b/‎.github/workflows/tests-python.yml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎bin/build_config_multimodal.py‎
Lines changed: 4 additions & 9 deletions b/‎bin/build_config_multimodal.py‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎docs/_sources/index.rst.txt‎
Lines changed: 10 additions & 1 deletion b/‎docs/_sources/index.rst.txt‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎docs/_sources/modules.rst.txt‎
Lines changed: 3 additions & 0 deletions b/‎docs/_sources/modules.rst.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/_sources/multimodal/configuration.rst.txt‎
Lines changed: 137 additions & 0 deletions b/‎docs/_sources/multimodal/configuration.rst.txt‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎docs/_sources/multimodal/overview.rst.txt‎
Lines changed: 31 additions & 0 deletions b/‎docs/_sources/multimodal/overview.rst.txt‎
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,30 @@
+name: build-docs
+
+on:
+  push:
+    branches: [dev]
+
+  pull_request:
+    branches: [main, dev]
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r ./envs/requirements.txt          
+          pip install -r ./envs/dev/requirements.txt
+          pip install -r ./envs/build_config/requirements.txt
+      - name: Sphinx Build
+        working-directory: ./sphinx
+        run: |
+          make html
+          touch _build/html/.nojekyll
@@ -1,12 +1,9 @@
-name: docs
+name: deploy-docs
 
 on:
   push:
-    branches: [ main, dev ]
-    tags: [ 'v*' ]
-
-  pull_request:
-    branches: [ main, dev ]
+    branches: [main]
+    tags: ["v*"]
 
 jobs:
   run:
@@ -17,21 +14,22 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: '3.10'
+          python-version: "3.10"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install -r ./envs/requirements.txt          
-          pip install sphinx sphinx-rtd-theme
+          pip install -r ./envs/dev/requirements.txt
+          pip install -r ./envs/build_config/requirements.txt
       - name: Sphinx Build
         working-directory: ./sphinx
-        run: | 
+        run: |
           make html
           touch _build/html/.nojekyll
       - name: Deploy to GitHub Pages
         uses: JamesIves/github-pages-deploy-action@v4.4.1
         with:
-          branch: dev
+          branch: gh-pages
           folder: sphinx/_build/html
           target-folder: docs
-          clean: true
+          clean: true
@@ -0,0 +1,42 @@
+name: docker-builds
+
+on:
+  push:
+    tags: ["v*"]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Get tag
+        uses: oprypin/find-latest-tag@v1
+        with:
+          repository: haniffalab/webatlas-pipeline
+          releases-only: true
+        id: find-latest-tag
+      - name: Set version
+        run: |
+          # TAG=${{ steps.find-latest-tag.outputs.tag }}
+          TAG=${GITHUB_REF#refs/*/}
+          VERSION=${TAG#v}
+          echo Version: $VERSION
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Build Docker (webatlas-pipeline)
+        working-directory: ./envs
+        run: |
+          docker build --platform=linux/amd64 -t haniffalab/webatlas-pipeline:${VERSION} -f ./Dockerfile .
+      - name: Build Docker (webatlas-pipeline-build-config)
+        working-directory: ./envs/build_config
+        run: |
+          docker build --platform=linux/amd64 -t haniffalab/webatlas-pipeline-build-config:${VERSION} -f ./Dockerfile .
+      - name: Push Docker images
+        run: |
+          docker push haniffalab/webatlas-pipeline:${VERSION}
+          docker push haniffalab/webatlas-pipeline-build-config:${VERSION}
@@ -2,10 +2,10 @@ name: python-tests
 
 on:
   push:
-    branches: [ main, dev ]
-    
+    branches: [main, dev]
+
   pull_request:
-    branches: [ main, dev ]
+    branches: [main, dev]
 
 jobs:
   run:
@@ -20,7 +20,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v2
         with:
-          python-version: '3.10'
+          python-version: "3.10"
       - name: Clone ome-zarr-metadata
         uses: actions/checkout@v2
         with:
@@ -31,10 +31,11 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r ./envs/requirements.txt
+          pip install -r ./envs/dev/requirements.txt
           cd ./ome-zarr-metadata && pre-commit install && pip install -e . && cd ../
       - name: Run tests
         run: python -m pytest --cov=bin tests/test_class.py
         env:
           PYTHONPATH: ./bin
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@v3
@@ -38,14 +38,10 @@ def write_json(
     Args:
         project (str, optional): Project name. Defaults to "".
         datasets (dict[str, dict[str]], optional): Dictionary of datasets.
-            Expected structure: { dataset_name: {
-                "file_paths" : [],
-                "images": {"raw": [], "label": []},
-                "options": {},
-                "obs_type": "cell"
-                "is_spatial": True // if has images should be enough
-                }
-            }
+            Expected structure: { dataset_name: { "file_paths" : [],
+            "images": {"raw": [], "label": []},
+            "options": {}, "obs_type": "cell",
+            "is_spatial": True } }
             Defaults to {}.
         extended_features (Union[list[str], str], optional): List of features or
             string of single feature on which the expression matrix was extended
@@ -62,7 +58,6 @@ def write_json(
         outdir (str, optional): Directory in which the config file will be written to.
             Defaults to "./".
     """
-
     config = VitessceConfig(
         "1.0.15",
         name=str(title) if len(title) else str(project),
 
@@ -9,7 +9,7 @@
 .. |DOI| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.7405818.svg
    :target: https://doi.org/10.5281/zenodo.7405818
 
-WebAtlas Pipeline
+WebAtlas pipeline
 =================
 
 This Nextflow pipeline processes spatial and single-cell experiment data for visualisation in `WebAtlas App`_. 
@@ -38,6 +38,15 @@ Indices and tables
 * :ref:`modindex`
 * :ref:`search`
 
+.. toctree::
+   :maxdepth: 2
+   :caption: Multimodal
+  
+   multimodal/overview
+   multimodal/configuration
+   multimodal/run
+   multimodal/visualise
+
 .. toctree::
    :maxdepth: 2
    :hidden:
 
@@ -37,3 +37,6 @@ Modules
 
 .. automodule:: integrate_image
     :members:
+
+.. automodule:: build_config_multimodal
+    :members:
@@ -0,0 +1,137 @@
+.. _multimodal_configuration:
+
+######################
+Multimodal configuration
+######################
+
+After running the main conversion pipeline you can populate the required YAML parameters file to run the multimodal integration pipeline.
+
+.. _multimodal_parameters_file:
+
+***************
+Parameters file
+***************
+
+The parameters file looks like this:
+
+.. code-block:: yaml
+
+    outdir: "/path/to/output/"
+
+    url: http://localhost:3000/
+    project: my_project
+    title: "My Project"
+
+    data:
+      -
+        dataset: scrnaseq
+        obs_type: cell
+        anndata: /path/to/main/output/scrnaseq-anndata.zarr
+        offset: 0
+        is_spatial: false
+        vitessce_options:
+          spatial:
+            xy: obsm/spatial
+          mappings:
+            obsm/X_umap: [0,1]
+          matrix: X
+      -
+        dataset: visium
+        obs_type: spot
+        anndata: /path/to/main/output/visium-anndata.zarr
+        offset: 1000000
+        is_spatial: true
+        raw_image: /path/to/main/output/visium-raw.zarr
+        label_image: /path/to/main/output/visium-label.zarr
+        vitessce_options:
+          spatial:
+            xy: obsm/spatial
+          matrix: X
+
+In contrast to the main conversion pipeline's parameters file, this file includes a single `project` to which multiple `datasets` belong to.
+
+Each ``dataset`` block defines the name of the dataset and paths to the converted data and image files (if any).
+
+Each ``dataset`` also requires a set of ``vitessce_options`` that specify the location of certain data (spatial coordinates, embeddings, expression matrix, etc.) within the AnnData object that is processed/generated.
+This follows the same structure as in the :ref:`main conversion's vitessce_options <vitessce_options>`.
+
+Additionally, each ``dataset`` requires:
+
+* ``obs_type``, the type of observation of the dataset. For example, "cell" or "spot".
+* ``offset``, an integer offset to add to the dataset's ID's so they don't clash with the other datasets.
+* ``is_spatial``, whether the dataset contains spatial information and has associated image files (raw and/or label images)
+
+Given that raw images are only read but not modified the pipeline does not generate new output from them.
+In order for the output directory (defined by ``outdir``) to contain all necessary files that need to be served for the web application to consume,
+by default, the pipeline copies the raw images to the output directory.
+This process can take a long time depending on the size of the image.
+You may want to manually copy or move the image or serve it from its own directory separate from the rest of the output.
+The default copying can be disabled by setting ``copy_raw: false`` as a project-wide parameter (at the same level as ``outdir``, ``project``, etc).
+For example,
+
+.. code-block:: yaml
+
+    outdir: "/path/to/output/"
+    url: http://localhost:3000/
+    project: my_project
+    title: "My Project"
+    copy_raw: false
+
+
+With additional features
+========================
+
+Using the above example parameters file to run the multimodal integration pipeline will run the reindexing and intersection steps.
+To perform the concatenation of additional features (like celltypes) to visualise them as continuous values, some extra parameters need to be added.
+
+As a project-wide parameter (at the same level as ``outdir``, ``project``, etc.):
+
+* ``extend_feature_name``, the name of the additional feature. For example, "celltype"
+
+And at a ``dataset`` level:
+
+* ``extend_feature``, the location of the additional feature information.
+  This can be either the path to a *cell2location* output file, or the location within the AnnData object where the feature is stored as a categorical within ``obs``.
+  For example, ``/path/to/c2l.h5ad`` containing predicted continuous values, or ``obs/celltype`` containing categoricals.
+
+The full parameters file will then look like this
+
+.. code-block:: yaml
+
+    outdir: "/path/to/output/"
+
+    url: http://localhost:3000/
+    project: my_project
+    title: "My Project"
+
+    extend_feature_name: celltype
+
+    data:
+      -
+        dataset: scrnaseq
+        obs_type: cell
+        anndata: /path/to/main/output/scrnaseq-anndata.zarr
+        extend_feature: obs/celltype
+        offset: 0
+        is_spatial: false
+        vitessce_options:
+          spatial:
+            xy: obsm/spatial
+          mappings:
+            obsm/X_umap: [0,1]
+          matrix: X
+      -
+        dataset: visium
+        obs_type: spot
+        anndata: /path/to/main/output/visium-anndata.zarr
+        extend_feature: /path/to/c2l.h5ad
+        offset: 1000000
+        is_spatial: true
+        raw_image: /path/to/main/output/visium-raw.zarr
+        label_image: /path/to/main/output/visium-label.zarr
+        vitessce_options:
+          spatial:
+            xy: obsm/spatial
+          matrix: X
+
+With this parameters the multimodal integration pipeline will concatenate the expression matrix with the additional feature values so both can be queried and visualised across datasets within the same portal.
@@ -0,0 +1,31 @@
+.. _multimodal_overview:
+
+###################
+Multimodal overview
+###################
+
+After the ``main.nf`` pipeline has been successfully run, WebAtlas can optionally process a group of multimodal datasets that 
+share common features. This step will prepare the unified multimodal visualision for the web app. 
+
+The data outputs generated by running the ``main.nf`` conversion pipeline serve as inputs for this multimodal integration pipeline.
+
+***************
+Running the multimodal pipeline
+***************
+
+Follow the instructions below to run the multimodal pipeline.
+
+1. :ref:`Configure <multimodal_configuration>` the parameters file for the ``multimodal.nf`` pipeline
+2. :ref:`Run <multimodal_run>` the ``multimodal.nf`` pipeline
+3. :ref:`Visualise <multimodal_visualise>` the multimodal data in a web browser
+
+***************
+Tasks completed by the pipeline
+***************
+
+The multimodal integration pipeline performs several tasks:
+
+1. Reindex each dataset by a user-inputed offset so ID's do not clash between modalities.
+2. *Optionally*, concatenate other observation-by-feature matrices or categorical values to the expression matrix to enable their visualisation as continuous values. For example, a celltype prediction matrix and/or celltype categories.
+3. Find the intersection of features between all datasets and subset them to visualise only the intersection (as including features not present in all datasets can produce misleading visualisations.)
+   **Note** the features are intersected using their index in the AnnData objects (``var`` table). All datasets must use the same type of data as index for the intersection to be correctly computed. For example, all datasets use names as index, or all datasets use IDs as index.