Merge pull request #28 from srivarra/feature/marimo-dataset

srivarra · web-flow · commit 11bf38cd0de4 · 2025-01-20T19:09:36.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### <!-- 0 --> 🏗️ Github Actions
+
+- Merge pull request #25 from srivarra/feature/type_fixes by [@srivarra](https://github.com/srivarra) in [#25](https://github.com/srivarra/annsel/pull/25)
+
+### <!-- 3 --> 📝 Documentation
+
+- Merge pull request #24 from srivarra/docs/changelog-template-add-gh-account by [@srivarra](https://github.com/srivarra) in [#24](https://github.com/srivarra/annsel/pull/24)
+
 ### <!-- 4 --> 🧪 Dependencies
 
 - Merge pull request #23 from srivarra/pre-commit-ci-update-config by [@srivarra](https://github.com/srivarra) in [#23](https://github.com/srivarra/annsel/pull/23)
@@ -15,7 +23,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### <!-- 5 --> 🌀 Miscellaneous
 
-- 📝 Updated changelog template
+- Merge pull request #27 from srivarra/ci/revert-arm-runner by [@srivarra](https://github.com/srivarra) in [#27](https://github.com/srivarra/annsel/pull/27)
+- 💚 One day I'll get CI right by [@srivarra](https://github.com/srivarra)
+- Merge pull request #26 from srivarra/pre-commit-ci-update-config by [@srivarra](https://github.com/srivarra) in [#26](https://github.com/srivarra/annsel/pull/26)
+- [pre-commit.ci] pre-commit autoupdate by [@pre-commit-ci[bot]](https://github.com/pre-commit-ci[bot])
+- 💚 Testing arm based ubuntu ci by [@srivarra](https://github.com/srivarra)
+- 🏷️ Update IntoExpr by [@srivarra](https://github.com/srivarra)
+- ⚗️ Marimo testing by [@srivarra](https://github.com/srivarra)
+- [pre-commit.ci] auto fixes from pre-commit.com hooks by [@pre-commit-ci[bot]](https://github.com/pre-commit-ci[bot])
+- Added github label commit parser by [@srivarra](https://github.com/srivarra)
+- 📝 Added github pr label commit parser by [@srivarra](https://github.com/srivarra)
+- 📝 Updated changelog template by [@srivarra](https://github.com/srivarra)
 - [pre-commit.ci] auto fixes from pre-commit.com hooks by [@pre-commit-ci[bot]](https://github.com/pre-commit-ci[bot])
 - [pre-commit.ci] pre-commit autoupdate by [@pre-commit-ci[bot]](https://github.com/pre-commit-ci[bot])
 - [pre-commit.ci] pre-commit autoupdate by [@pre-commit-ci[bot]](https://github.com/pre-commit-ci[bot])
diff --git a/docs/api/datasets.md b/docs/api/datasets.md
@@ -15,4 +15,5 @@
     :toctree: ../generated
 
     leukemic_bone_marrow_dataset
+    marimo_dataset
 ```
diff --git a/src/annsel/datasets/__init__.py b/src/annsel/datasets/__init__.py
@@ -1,3 +1,3 @@
-from .basic import leukemic_bone_marrow_dataset
+from .basic import leukemic_bone_marrow_dataset, marimo_dataset
 
-__all__ = ["leukemic_bone_marrow_dataset"]
+__all__ = ["leukemic_bone_marrow_dataset", "marimo_dataset"]
diff --git a/src/annsel/datasets/basic.py b/src/annsel/datasets/basic.py
@@ -119,3 +119,49 @@ def leukemic_bone_marrow_dataset(
     )
     adata.strings_to_categoricals()
     return adata
+
+
+def marimo_dataset() -> ad.AnnData:
+    """Generate a fake in memory dataset for Marimo, since it's not possible to load a dataset from a URL.
+
+    Returns
+    -------
+    A minimal  dataset as an AnnData object.
+    """
+    import numpy as np
+    import pandas as pd
+
+    rng = np.random.default_rng(42)
+
+    X = rng.random((100, 100)) * 10
+    log1p_X = np.log1p(X)
+
+    obs = pd.DataFrame(
+        data={
+            "cell_type": pd.Categorical(
+                rng.choice(a=["A", "B", "C"], p=[0.3, 0.4, 0.3], size=100),
+            ),
+            "batch": pd.Categorical(
+                rng.choice(a=["1", "2", "3"], p=[0.3, 0.4, 0.3], size=100),
+            ),
+            "development_stage": pd.Categorical(
+                rng.choice(a=["embryonic", "adult"], p=[0.3, 0.7], size=100),
+            ),
+        },
+        index=pd.Index([f"cell_{i}" for i in range(100)], name="index"),
+    )
+
+    var = pd.DataFrame(
+        data={
+            "feature_type": pd.Categorical(
+                rng.choice(a=["protein", "rna"], p=[0.3, 0.7], size=100),
+            ),
+        },
+        index=pd.Index([f"gene_{i}" for i in range(100)], name="index"),
+    )
+
+    adata = ad.AnnData(X=X, obs=obs, var=var, layers={"log1p": log1p_X})
+    return adata
+
+    # adata = ad.AnnData(X=X, obs=obs, var=var)
+    # return adata
diff --git a/tests/datasets/test_basic.py b/tests/datasets/test_basic.py
@@ -0,0 +1,7 @@
+from annsel.datasets import marimo_dataset
+
+
+def test_marimo_dataset():
+    adata = marimo_dataset()
+    assert adata.n_obs == 100
+    assert adata.n_vars == 100
diff --git a/uv.lock b/uv.lock