BiAPoL · jo-mueller · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
@@ -17,9 +17,28 @@ on:
   workflow_dispatch:
 
 jobs:
+  add-assets-to-release:
+    name: Add Sample Data Asset to Release
+    if: startsWith(github.ref, 'refs/tags/') # only run on tag pushes (releases)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Create sample_data.zip
+        run: |
+          cd src/napari_clusters_plotter/sample_data
+          zip -r ../../../sample_data.zip . -x "*.git*"
+      - name: Upload Sample Data Asset
+        uses: softprops/action-gh-release@v1
+        with:
+          files: sample_data.zip
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
   test:
     name: ${{ matrix.platform }} py${{ matrix.python-version }}
     runs-on: ${{ matrix.platform }}
+    needs: [add-assets-to-release] # Wait for assets to be uploaded
+    if: always() # Run tests even if no release assets (for PRs)
     strategy:
       matrix:
         platform: [ubuntu-latest, windows-latest, macos-latest] # macos-latest (disabled, see related issue)
@@ -33,7 +52,7 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-            # these libraries enable testing on Qt on linux
+      # these libraries enable testing on Qt on linux
       - uses: tlambert03/setup-qt-libs@v1
 
       # strategy borrowed from vispy for installing opengl libs on windows
@@ -63,28 +82,47 @@ jobs:
       - name: Coverage
         uses: codecov/codecov-action@v3
 
-  deploy:
-    # this will run when you have tagged a commit,
-    # and requires that you have put your twine API key in your
-    # github secrets (see readme for details)
-    needs: [test]
+  build:
+    name: Build distribution 📦
+    needs: [test] # Wait for tests to pass
     runs-on: ubuntu-latest
-    if: contains(github.ref, 'tags')
+
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.x"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -U setuptools setuptools_scm wheel twine build
-      - name: Build and publish
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
-        run: |
-          git tag
-          python -m build .
-          twine upload dist/*
+      - name: Install pypa/build
+        run: >-
+          python3 -m pip install build --user
+      - name: Build a binary wheel and a source tarball
+        run: python3 -m build
+      - name: Store the distribution packages
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+  publish-to-pypi:
+    name: >-
+      Publish Python 🐍 distribution 📦 to PyPI
+    if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
+    needs:
+      - test
+      - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/project/napari-clusters-plotter/
+    permissions:
+      id-token: write # IMPORTANT: mandatory for trusted publishing
+
+    steps:
+      - name: Download all the dists
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Publish distribution 📦 to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,9 @@ __pycache__/
 # C extensions
 *.so
 
+sample_data.zip
+data_registry.txt
+
 # Distribution / packaging
 .Python
 env/

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -2,7 +2,9 @@ include LICENSE
 include README.md
 include requirements.txt
 
-recursive-include src/napari-clusters-plotter/sample_data *
+# Include necessary plugin files and exclude sample data
+recursive-exclude src/napari_clusters_plotter/sample_data *
+include src/napari_clusters_plotter/sample_data/data_registry.txt
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,8 @@ dependencies = [
     "scikit-image",
     "scipy",
     "biaplotter>=0.3.1",
-    "imagecodecs"
+    "imagecodecs",
+    "pooch"
 ]
 
 

diff --git a/src/napari_clusters_plotter/_create_sample_data_assets.py b/src/napari_clusters_plotter/_create_sample_data_assets.py
@@ -0,0 +1,49 @@
+import os
+import zipfile
+from pathlib import Path
+
+
+def create_sample_data_zip():
+    import shutil
+
+    sample_data_dir = Path(__file__).parent / "sample_data"
+    zip_path = Path(__file__).parent / "sample_data.zip"
+
+    if os.path.exists(zip_path):
+        shutil.rmtree(zip_path)
+
+    if os.path.exists(sample_data_dir.parent / "data_registry.txt"):
+        os.remove(sample_data_dir.parent / "data_registry.txt")
+
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+        for root, _, files in os.walk(sample_data_dir):
+            for file in files:
+                file_path = Path(root) / file
+                arcname = file_path.relative_to(sample_data_dir)
+                zipf.write(file_path, arcname)
+
+    # copy to sample_data folder
+    dest_path = sample_data_dir.parent / "sample_data" / "sample_data.zip"
+    os.replace(zip_path, dest_path)
+
+
+def create_registry_file():
+    import hashlib
+
+    root_dir = Path(__file__).parent / "sample_data"  # Update this path
+    registry = {}
+    with open(root_dir / "data_registry.txt", "w") as registry_file:
+        for root, _, files in os.walk(root_dir):
+            for fn in files:
+                fp = Path(root) / fn
+                rel_path = fp.relative_to(root_dir).as_posix()
+                with open(fp, "rb") as f:
+                    file_hash = hashlib.sha256(f.read()).hexdigest()
+                registry[str(rel_path)] = f"sha256:{file_hash}"
+                registry_file.write(f"{rel_path}: sha256:{file_hash}\n")
+
+
+if __name__ == "__main__":
+    create_registry_file()
+    create_sample_data_zip()
+    create_registry_file()
diff --git a/src/napari_clusters_plotter/_sample_data.py b/src/napari_clusters_plotter/_sample_data.py
@@ -1,22 +1,81 @@
-import glob
 import os
+import zipfile
 from pathlib import Path
 from typing import List
 
 import numpy as np
+import pandas as pd
+import pooch
+from skimage import io
 
+from napari_clusters_plotter import __version__
 
-def skan_skeleton() -> List["LayerData"]:  # noqa: F821
-    import pandas as pd
-    from skimage.io import imread
+# parse version
+if "dev" in __version__:
+    from packaging.version import parse
+
+    major, minor, patch = parse(__version__).release
+    version = f"{major}.{minor}.{patch-1}"
+else:
+    version = __version__
+
+DATA_REGISTRY = pooch.create(
+    path=pooch.os_cache("napari-clusters-plotter"),
+    base_url=f"https://github.com/biapol/napari-clusters-plotter/releases/download/v{version}/",
+    registry={
+        "sample_data.zip": "sha256:d21889252cc439b32dacbfb2d4085057da1fe28e3c35f94fee1487804cfe9615"
+    },
+)
+
+
+def load_image(fname):
+    zip_path = DATA_REGISTRY.fetch("sample_data.zip")
+
+    # check if has been unzipped before
+    if not os.path.exists(zip_path.split(".zip")[0]):
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(zip_path.split(".zip")[0])
+
+    fname = os.path.join(zip_path.split(".zip")[0], fname)
+    image = io.imread(fname)
+
+    return image
+
+
+def load_tabular(fname, **kwargs):
+    zip_path = DATA_REGISTRY.fetch("sample_data.zip")
+
+    # check if has been unzipped before
+    if not os.path.exists(zip_path.split(".zip")[0]):
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(zip_path.split(".zip")[0])
+
+    fname = os.path.join(zip_path.split(".zip")[0], fname)
+    data = pd.read_csv(fname, **kwargs)
+    return data
+
+
+def load_registry():
+    zip_path = DATA_REGISTRY.fetch("sample_data.zip")
 
-    paths_data = Path(__file__).parent / "sample_data" / "shapes_skeleton"
-    df_paths = pd.read_csv(
-        paths_data / Path("all_paths.csv"),
+    # check if has been unzipped before
+    if not os.path.exists(zip_path.split(".zip")[0]):
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(zip_path.split(".zip")[0])
+
+    fname = os.path.join(
+        zip_path.split(".zip")[0], "sample_data/data_registry.txt"
     )
-    df_features = pd.read_csv(
-        paths_data / Path("skeleton_features.csv"),
-        index_col="Unnamed: 0",  # Adjusted to match the CSV structure
+    registry = pd.read_csv(fname, sep=": sha256:", header=None)
+    registry.columns = ["file", "hash"]
+    return registry
+
+
+def skan_skeleton() -> List["LayerData"]:  # noqa: F821
+
+    df_paths = load_tabular("shapes_skeleton/all_paths.csv")
+    df_features = load_tabular(
+        "shapes_skeleton/skeleton_features.csv", index_col="Unnamed: 0"
     )
 
     # skeleton_id column should be categorical
@@ -49,7 +108,7 @@ def skan_skeleton() -> List["LayerData"]:  # noqa: F821
     )
 
     layer_blobs = (
-        imread(paths_data / Path("blobs.tif")),
+        load_image("shapes_skeleton/blobs.tif"),
         {
             "name": "binary blobs",
             "opacity": 0.5,
@@ -62,17 +121,14 @@ def skan_skeleton() -> List["LayerData"]:  # noqa: F821
 
 
 def tgmm_mini_dataset() -> List["LayerData"]:  # noqa: F821
-    import pandas as pd
-    from skimage.io import imread
 
-    path = Path(__file__).parent / "sample_data" / "tracking_data"
-    data = pd.read_csv(path / Path("tgmm-mini-tracks-layer-data.csv"))
-    features = pd.read_csv(
-        path / Path("tgmm-mini-spot.csv"),
+    features = load_tabular(
+        "tracking_data/tgmm-mini-spot.csv",
         skiprows=[1, 2],
         low_memory=False,
         encoding="utf-8",
     )
+    data = load_tabular("tracking_data/tgmm-mini-tracks-layer-data.csv")
 
     categorical_columns = [
         "Label",
@@ -82,7 +138,7 @@ def tgmm_mini_dataset() -> List["LayerData"]:  # noqa: F821
     ]
     for feature in categorical_columns:
         features[feature] = features[feature].astype("category")
-    tracking_label_image = imread(path / Path("tgmm-mini.tif"))
+    tracking_label_image = load_image("tracking_data/tgmm-mini.tif")
 
     layer_data_tuple_tracks = (
         data,
@@ -108,26 +164,26 @@ def tgmm_mini_dataset() -> List["LayerData"]:  # noqa: F821
 
 
 def bbbc_1_dataset() -> List["LayerData"]:  # noqa: F821
-    import pandas as pd
-    from skimage import io
-
-    # get path of this file
-    path = Path(__file__).parent / "sample_data" / "BBBC007_v1_images"
+    # read data registry file
+    registry = load_registry()
 
-    tif_files = glob.glob(
-        os.path.join(str(path), "**", "*.tif"), recursive=True
-    )
+    registry_bbby1 = registry[
+        registry["file"].str.contains("BBBC007_v1_images")
+    ]
+    tif_files = registry_bbby1[registry_bbby1["file"].str.endswith(".tif")][
+        "file"
+    ].to_list()
     raw_images = [f for f in tif_files if "labels" not in f]
 
     n_rows = np.ceil(np.sqrt(len(raw_images)))
     n_cols = np.ceil(len(raw_images) / n_rows)
 
     layers = []
 
-    images = [io.imread(f) for f in raw_images]
-    labels = [io.imread(f.replace(".tif", "_labels.tif")) for f in raw_images]
+    images = [load_image(f) for f in raw_images]
+    labels = [load_image(f.replace(".tif", "_labels.tif")) for f in raw_images]
     features = [
-        pd.read_csv(f.replace(".tif", "_features.csv")) for f in raw_images
+        load_tabular(f.replace(".tif", "_features.csv")) for f in raw_images
     ]
 
     max_size = max([image.shape[0] for image in images])
@@ -172,17 +228,16 @@ def bbbc_1_dataset() -> List["LayerData"]:  # noqa: F821
 
 
 def cells3d_curvatures() -> List["LayerData"]:  # noqa: F821
-    import numpy as np
-    import pandas as pd
-    from skimage import io
-
-    path = Path(__file__).parent / "sample_data" / "cells3d"
-
-    # load data
-    vertices = np.loadtxt(path / "vertices.txt")
-    faces = np.loadtxt(path / "faces.txt").astype(int)
-    hks = pd.read_csv(path / "signature.csv")
-    nuclei = io.imread(path / "nucleus.tif")
+    vertices = load_tabular(
+        "cells3d/vertices.txt", sep=" ", header=None
+    ).to_numpy()
+    faces = (
+        load_tabular("cells3d/faces.txt", sep=" ", header=None)
+        .to_numpy()
+        .astype(int)
+    )
+    hks = load_tabular("cells3d/signature.csv")
+    nuclei = load_image("cells3d/nucleus.tif")
 
     # create layer data tuples
     layer_data_surface = (
@@ -208,12 +263,11 @@ def cells3d_curvatures() -> List["LayerData"]:  # noqa: F821
 
 def granule_compression_vectors() -> List["LayerData"]:  # noqa: F821
     import numpy as np
-    import pandas as pd
     from napari.utils import notifications
 
-    path = Path(__file__).parent / "sample_data" / "compression_vectors"
-
-    features = pd.read_csv(path / "granular_compression_test.csv")
+    features = load_tabular(
+        "compression_vectors/granular_compression_test.csv"
+    )
     features["iterations"] = features["iterations"].astype("category")
     features["returnStatus"] = features["returnStatus"].astype("category")
     features["Label"] = features["Label"].astype("category")

diff --git a/...ple_data/BBBC007_v1_images/A9/A9 p10d.tif → ...ple_data/BBBC007_v1_images/A9/A9_p10d.tif b/...ple_data/BBBC007_v1_images/A9/A9 p10d.tif → ...ple_data/BBBC007_v1_images/A9/A9_p10d.tif
diff --git a/...BBBC007_v1_images/A9/A9 p10d_features.csv → ...BBBC007_v1_images/A9/A9_p10d_features.csv b/...BBBC007_v1_images/A9/A9 p10d_features.csv → ...BBBC007_v1_images/A9/A9_p10d_features.csv
diff --git a/...a/BBBC007_v1_images/A9/A9 p10d_labels.tif → ...a/BBBC007_v1_images/A9/A9_p10d_labels.tif b/...a/BBBC007_v1_images/A9/A9 p10d_labels.tif → ...a/BBBC007_v1_images/A9/A9_p10d_labels.tif
diff --git a/...mple_data/BBBC007_v1_images/A9/A9 p5d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p5d.tif b/...mple_data/BBBC007_v1_images/A9/A9 p5d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p5d.tif
diff --git a/.../BBBC007_v1_images/A9/A9 p5d_features.csv → .../BBBC007_v1_images/A9/A9_p5d_features.csv b/.../BBBC007_v1_images/A9/A9 p5d_features.csv → .../BBBC007_v1_images/A9/A9_p5d_features.csv
diff --git a/...ta/BBBC007_v1_images/A9/A9 p5d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p5d_labels.tif b/...ta/BBBC007_v1_images/A9/A9 p5d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p5d_labels.tif
diff --git a/...mple_data/BBBC007_v1_images/A9/A9 p7d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p7d.tif b/...mple_data/BBBC007_v1_images/A9/A9 p7d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p7d.tif
diff --git a/.../BBBC007_v1_images/A9/A9 p7d_features.csv → .../BBBC007_v1_images/A9/A9_p7d_features.csv b/.../BBBC007_v1_images/A9/A9 p7d_features.csv → .../BBBC007_v1_images/A9/A9_p7d_features.csv
diff --git a/...ta/BBBC007_v1_images/A9/A9 p7d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p7d_labels.tif b/...ta/BBBC007_v1_images/A9/A9 p7d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p7d_labels.tif
diff --git a/...mple_data/BBBC007_v1_images/A9/A9 p9d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p9d.tif b/...mple_data/BBBC007_v1_images/A9/A9 p9d.tif → ...mple_data/BBBC007_v1_images/A9/A9_p9d.tif
diff --git a/.../BBBC007_v1_images/A9/A9 p9d_features.csv → .../BBBC007_v1_images/A9/A9_p9d_features.csv b/.../BBBC007_v1_images/A9/A9 p9d_features.csv → .../BBBC007_v1_images/A9/A9_p9d_features.csv
diff --git a/...ta/BBBC007_v1_images/A9/A9 p9d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p9d_labels.tif b/...ta/BBBC007_v1_images/A9/A9 p9d_labels.tif → ...ta/BBBC007_v1_images/A9/A9_p9d_labels.tif
diff --git a/...v1_images/f96 (17)/17P1_POS0006_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0006_D_1UL.tif b/...v1_images/f96 (17)/17P1_POS0006_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0006_D_1UL.tif
diff --git a/.../f96 (17)/17P1_POS0006_D_1UL_features.csv → ...es/f96_17/17P1_POS0006_D_1UL_features.csv b/.../f96 (17)/17P1_POS0006_D_1UL_features.csv → ...es/f96_17/17P1_POS0006_D_1UL_features.csv
diff --git a/...es/f96 (17)/17P1_POS0006_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0006_D_1UL_labels.tif b/...es/f96 (17)/17P1_POS0006_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0006_D_1UL_labels.tif
diff --git a/...v1_images/f96 (17)/17P1_POS0007_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0007_D_1UL.tif b/...v1_images/f96 (17)/17P1_POS0007_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0007_D_1UL.tif
diff --git a/.../f96 (17)/17P1_POS0007_D_1UL_features.csv → ...es/f96_17/17P1_POS0007_D_1UL_features.csv b/.../f96 (17)/17P1_POS0007_D_1UL_features.csv → ...es/f96_17/17P1_POS0007_D_1UL_features.csv
diff --git a/...es/f96 (17)/17P1_POS0007_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0007_D_1UL_labels.tif b/...es/f96 (17)/17P1_POS0007_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0007_D_1UL_labels.tif
diff --git a/...v1_images/f96 (17)/17P1_POS0011_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0011_D_1UL.tif b/...v1_images/f96 (17)/17P1_POS0011_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0011_D_1UL.tif
diff --git a/.../f96 (17)/17P1_POS0011_D_1UL_features.csv → ...es/f96_17/17P1_POS0011_D_1UL_features.csv b/.../f96 (17)/17P1_POS0011_D_1UL_features.csv → ...es/f96_17/17P1_POS0011_D_1UL_features.csv
diff --git a/...es/f96 (17)/17P1_POS0011_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0011_D_1UL_labels.tif b/...es/f96 (17)/17P1_POS0011_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0011_D_1UL_labels.tif
diff --git a/...v1_images/f96 (17)/17P1_POS0013_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0013_D_1UL.tif b/...v1_images/f96 (17)/17P1_POS0013_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0013_D_1UL.tif
diff --git a/.../f96 (17)/17P1_POS0013_D_1UL_features.csv → ...es/f96_17/17P1_POS0013_D_1UL_features.csv b/.../f96 (17)/17P1_POS0013_D_1UL_features.csv → ...es/f96_17/17P1_POS0013_D_1UL_features.csv
diff --git a/...es/f96 (17)/17P1_POS0013_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0013_D_1UL_labels.tif b/...es/f96 (17)/17P1_POS0013_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0013_D_1UL_labels.tif
diff --git a/...v1_images/f96 (17)/17P1_POS0014_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0014_D_1UL.tif b/...v1_images/f96 (17)/17P1_POS0014_D_1UL.tif → ...7_v1_images/f96_17/17P1_POS0014_D_1UL.tif
diff --git a/.../f96 (17)/17P1_POS0014_D_1UL_features.csv → ...es/f96_17/17P1_POS0014_D_1UL_features.csv b/.../f96 (17)/17P1_POS0014_D_1UL_features.csv → ...es/f96_17/17P1_POS0014_D_1UL_features.csv
diff --git a/...es/f96 (17)/17P1_POS0014_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0014_D_1UL_labels.tif b/...es/f96 (17)/17P1_POS0014_D_1UL_labels.tif → ...ages/f96_17/17P1_POS0014_D_1UL_labels.tif