Skip to content

Commit baef21d

Browse files
committed
fix: raise ValueError when combat batch has fewer than 2 cells
Previously, `sc.pp.combat` would silently produce NaN values when a batch contained only 1 cell, because within-batch variance cannot be estimated from a single observation. This adds input validation to raise a clear error before computation begins. Closes #1175
1 parent ee7cd17 commit baef21d

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

docs/release-notes/1175.fix.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{func}`scanpy.pp.combat` now raises a {class}`ValueError` when a batch contains fewer than 2 cells, instead of silently producing NaN values in the corrected data {smaller}`L Zhang`

src/scanpy/preprocessing/_combat.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,22 @@ def combat( # noqa: PLR0915
206206
batch_info = model.groupby(key, observed=True).indices.values()
207207
n_batch = len(batch_info)
208208
n_batches = np.array([len(v) for v in batch_info])
209+
210+
# check for batches with fewer than 2 cells
211+
small_batches = [
212+
batch
213+
for batch, size in zip(
214+
model.groupby(key, observed=True).indices, n_batches, strict=True
215+
)
216+
if size < 2
217+
]
218+
if small_batches:
219+
msg = (
220+
f"Batches {small_batches!r} have fewer than 2 cells. "
221+
"ComBat requires at least 2 cells per batch to estimate "
222+
"within-batch variance. Filter these batches before running combat."
223+
)
224+
raise ValueError(msg)
209225
n_array = float(sum(n_batches))
210226

211227
# standardize across genes using a pooled variance estimator

tests/test_combat.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,20 @@ def test_combat_obs_names():
7575
assert_equal(a, b)
7676

7777

78+
def test_combat_single_cell_batch():
79+
"""Test that combat raises an error when a batch has fewer than 2 cells.
80+
81+
Regression test for https://github.com/scverse/scanpy/issues/1175
82+
"""
83+
adata = sc.datasets.blobs()
84+
# Create a batch where one category has only 1 cell
85+
batch = pd.Categorical(["single"] + ["other"] * (adata.n_obs - 1))
86+
adata.obs["batch"] = batch
87+
88+
with pytest.raises(ValueError, match="fewer than 2 cells"):
89+
sc.pp.combat(adata, key="batch")
90+
91+
7892
def test_silhouette():
7993
# this test checks wether combat can align data from several gaussians
8094
# it checks this by computing the silhouette coefficient in a pca embedding

0 commit comments

Comments
 (0)