Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3824.feat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add n\_obs\_aggregated to sc.get.aggregate output to show the total number of observations aggregated per group.
4 changes: 4 additions & 0 deletions src/scanpy/get/_aggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ def aggregate( # noqa: PLR0912

dim_df = getattr(adata, axis_name)
categorical, new_label_df = _combine_categories(dim_df, by)

# Add number of obs aggregated into each group
group_sizes = pd.Series(categorical).value_counts().reindex(new_label_df.index)
new_label_df["n_obs_aggregated"] = group_sizes.values
Comment on lines +300 to +301
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
group_sizes = pd.Series(categorical).value_counts().reindex(new_label_df.index)
new_label_df["n_obs_aggregated"] = group_sizes.values
new_label_df["n_obs_aggregated"] = pd.Series(categorical).value_counts().reindex(new_label_df.index)

no?

# Actual computation
layers = _aggregate(
data,
Expand Down
35 changes: 35 additions & 0 deletions tests/test_aggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,38 @@ def test_factors():

res = sc.get.aggregate(adata, by=["a", "b", "c", "d"], func="sum")
np.testing.assert_equal(res.layers["sum"], adata.X)


def test_aggregate_adds_n_obs_aggregated_single_key(pbmc_adata):
result = sc.get.aggregate(pbmc_adata, by="louvain", func="mean")
# Check column exists
assert "n_obs_aggregated" in result.obs
# Counts should be positive
assert (result.obs["n_obs_aggregated"] > 0).all()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to check this given the below check

# Total counts should equal original n_obs
assert result.obs["n_obs_aggregated"].sum() == pbmc_adata.n_obs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is wrong, you'd want the per-louvain group counts



def test_aggregate_adds_n_obs_aggregated_multiple_keys(pbmc_adata):
pbmc_adata.obs["percent_mito_binned"] = pd.cut(
pbmc_adata.obs["percent_mito"], bins=5
)
result = sc.get.aggregate(
pbmc_adata, by=["louvain", "percent_mito_binned"], func="mean"
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't make sense to aggregate percent_mito_binned because it is not a categorical

assert "n_obs_aggregated" in result.obs
# Still sums back to the total number of obs
assert result.obs["n_obs_aggregated"].sum() == pbmc_adata.n_obs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same on the count check



def test_aggregate_n_obs_aggregated_no_empty_groups(pbmc_adata):
# Force a categorical with unused categories
pbmc_adata.obs["fake_group"] = pd.Categorical(
["A"] * (pbmc_adata.n_obs - 1) + ["B"], categories=["A", "B", "C"]
)
result = sc.get.aggregate(pbmc_adata, by="fake_group", func="mean")
assert "n_obs_aggregated" in result.obs
# Only groups with data should appear
assert set(result.obs["fake_group"]) == {"A", "B"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to check these

# Count check
assert result.obs["n_obs_aggregated"].sum() == pbmc_adata.n_obs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same on the count check

Loading