Skip to content

Commit ff27547

Browse files
authored
Merge pull request #30 from quadbio/feat/naming
Enhanced numerical obs mapping support and method consistency
2 parents 7edb7fe + 6071e84 commit ff27547

File tree

10 files changed

+324
-245
lines changed

10 files changed

+324
-245
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ __pycache__/
1919
/docs/generated/
2020
/docs/_build/
2121
.ipynb_checkpoints/
22+
/docs/notebooks/tutorials/test_adata.ipynb
2223

2324
# datasets and models
2425
*.h5ad

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ With a joint embedding in ``.obsm["X_joint"]`` at hand, the simplest way to use
5353
```Python
5454
from cellmapper import CellMapper
5555

56-
cmap = CellMapper(query, reference).fit(
56+
cmap = CellMapper(query, reference).map(
5757
use_rep="X_joint", obs_keys="celltype", obsm_keys="X_umap", layer_key="X"
5858
)
5959
```

docs/notebooks/tutorials/spatial_mapping.ipynb

Lines changed: 33 additions & 28 deletions
Large diffs are not rendered by default.

docs/notebooks/tutorials/spatial_smoothing.ipynb

Lines changed: 42 additions & 75 deletions
Large diffs are not rendered by default.

src/cellmapper/model/cellmapper.py

Lines changed: 157 additions & 109 deletions
Large diffs are not rendered by default.

src/cellmapper/model/evaluate.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def evaluate_label_transfer(
156156

157157
if pred_postfix is None or conf_postfix is None:
158158
raise ValueError(
159-
"Label transfer has not been performed. Either call transfer_labels() first "
159+
"Label transfer has not been performed. Either call map_obs() first "
160160
"or provide prediction_postfix and confidence_postfix parameters."
161161
)
162162

@@ -219,7 +219,7 @@ def plot_confusion_matrix(
219219
Additional keyword arguments to pass to ConfusionMatrixDisplay.
220220
"""
221221
if self.prediction_postfix is None or self.confidence_postfix is None:
222-
raise ValueError("Label transfer has not been performed. Call transfer_labels() first.")
222+
raise ValueError("Label transfer has not been performed. Call map_obs() first.")
223223

224224
# Extract true and predicted labels
225225
y_true = self.query.obs[label_key].dropna()
@@ -336,7 +336,7 @@ def _get_aligned_expression_arrays(self, layer_key: str) -> tuple[np.ndarray, np
336336
"""
337337
if self.query_imputed is None:
338338
raise ValueError(
339-
"Imputed query data not found. Either run transfer_expression() first or set query_imputed manually."
339+
"Imputed query data not found. Either run map_layers() first or set query_imputed manually."
340340
)
341341
shared_genes = list(self.query_imputed.var_names.intersection(self.query.var_names))
342342
if len(shared_genes) == 0:

tests/conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ def adata_pbmc3k(precomputed_leiden):
5959
# PCA
6060
sc.tl.pca(adata, mask_var="highly_variable")
6161

62+
# Compute diffusion pseudotime for testing numerical obs mapping
63+
# First compute neighbors and diffusion map
64+
sc.pp.neighbors(adata, n_neighbors=15, use_rep="X_pca")
65+
sc.tl.diffmap(adata)
66+
67+
# Set root cell for pseudotime computation (use first cell)
68+
adata.uns["iroot"] = 0
69+
sc.tl.dpt(adata)
70+
6271
# Load precomputed leiden clustering
6372
adata.obs["leiden"] = precomputed_leiden.astype("str").astype("category")
6473

tests/test_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ class TestEvaluate:
88
@pytest.mark.parametrize("method", ["pearson", "spearman", "js", "rmse"])
99
@pytest.mark.parametrize("groupby", ["batch", "modality"])
1010
def test_evaluate_expression_transfer_layers_and_methods(self, cmap, eval_layer, method, groupby):
11-
cmap.transfer_expression(layer_key="X")
11+
cmap.map_layers(key="X")
1212
cmap.evaluate_expression_transfer(layer_key=eval_layer, method=method, groupby=groupby)
1313
metrics = cmap.expression_transfer_metrics
1414
assert metrics["method"] == method

tests/test_query_to_reference_mapping.py

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33
from scipy.sparse import issparse
4+
from scipy.stats import pearsonr
45

56
from cellmapper.model.cellmapper import CellMapper
67

@@ -18,17 +19,17 @@ class TestQueryToReferenceMapping:
1819
"""Tests for query-to-reference mapping functionality in CellMapper."""
1920

2021
def test_label_transfer(self, cmap, expected_label_transfer_metrics):
21-
cmap.transfer_labels(obs_keys="leiden")
22+
cmap.map_obs(key="leiden")
2223
cmap.evaluate_label_transfer(label_key="leiden")
2324
assert_metrics_close(cmap.label_transfer_metrics, expected_label_transfer_metrics)
2425

2526
def test_embedding_transfer(self, cmap):
26-
cmap.transfer_embeddings(obsm_keys="X_pca")
27+
cmap.map_obsm(key="X_pca")
2728
assert "X_pca_pred" in cmap.query.obsm
2829
assert cmap.query.obsm["X_pca_pred"].shape[0] == cmap.query.n_obs
2930

3031
def test_expression_transfer(self, cmap, expected_expression_transfer_metrics):
31-
cmap.transfer_expression(layer_key="X")
32+
cmap.map_layers(key="X")
3233
cmap.evaluate_expression_transfer(layer_key="X", method="pearson")
3334
assert_metrics_close(cmap.expression_transfer_metrics, expected_expression_transfer_metrics)
3435

@@ -41,7 +42,7 @@ def test_compute_mapping_matrix_all_methods(self, cmap, method):
4142

4243
@pytest.mark.parametrize("layer_key", ["X", "counts"])
4344
def test_expression_transfer_layers(self, cmap, layer_key):
44-
cmap.transfer_expression(layer_key=layer_key)
45+
cmap.map_layers(key=layer_key)
4546
assert cmap.query_imputed is not None
4647
assert cmap.query_imputed.X.shape[0] == cmap.query.n_obs
4748

@@ -58,7 +59,7 @@ def test_expression_transfer_layers(self, cmap, layer_key):
5859
],
5960
)
6061
def test_fit_various_combinations(self, cmap, obs_keys, obsm_keys, layer_key):
61-
cmap.fit(obs_keys=obs_keys, obsm_keys=obsm_keys, layer_key=layer_key)
62+
cmap.map(obs_keys=obs_keys, obsm_keys=obsm_keys, layer_key=layer_key)
6263
if obs_keys is not None:
6364
keys = [obs_keys] if isinstance(obs_keys, str) else obs_keys
6465
for key in keys:
@@ -71,11 +72,11 @@ def test_fit_various_combinations(self, cmap, obs_keys, obsm_keys, layer_key):
7172
assert cmap.query_imputed is not None
7273
assert cmap.query_imputed.X.shape[0] == cmap.query.n_obs
7374

74-
def test_transfer_labels_self_mapping(self, query_reference_adata):
75+
def test_map_obs_self_mapping(self, query_reference_adata):
7576
"""Check mapping to self."""
7677
_, reference = query_reference_adata
7778
cm = CellMapper(reference, reference)
78-
cm.fit(
79+
cm.map(
7980
knn_method="sklearn",
8081
mapping_method="jaccard",
8182
obs_keys="leiden",
@@ -174,13 +175,13 @@ def test_query_imputed_invalid_type(self, cmap):
174175
with pytest.raises(TypeError):
175176
cmap.query_imputed = [1, 2, 3]
176177

177-
def test_query_imputed_integration_with_transfer_expression(self, cmap, random_imputed_data):
178-
"""Test that transfer_expression correctly uses the query_imputed property."""
178+
def test_query_imputed_integration_with_map_layers(self, cmap, random_imputed_data):
179+
"""Test that map_layers correctly uses the query_imputed property."""
179180
# First check query_imputed is None
180181
assert cmap.query_imputed is None
181182

182-
# Transfer expression
183-
cmap.transfer_expression(layer_key="X")
183+
# Map expression
184+
cmap.map_layers(key="X")
184185

185186
# Verify query_imputed was set
186187
assert cmap.query_imputed is not None
@@ -255,3 +256,51 @@ def test_compute_neighbors_fallback(self, cmap, n_comps, fallback_representation
255256
assert key_added in cmap.query.obsm
256257
assert cmap.reference.obsm[key_added].shape[1] == n_comps
257258
assert cmap.query.obsm[key_added].shape[1] == n_comps
259+
260+
def test_map_obs_numerical_data_type_detection(self, query_reference_adata):
261+
"""Test that numerical data types are correctly detected in map_obs."""
262+
query, reference = query_reference_adata
263+
264+
# Add some numerical data to reference
265+
reference.obs["numerical_score"] = np.random.rand(reference.n_obs)
266+
reference.obs["integer_score"] = np.random.randint(0, 100, reference.n_obs)
267+
268+
# Create CellMapper and compute mapping matrix
269+
cmap = CellMapper(query=query, reference=reference)
270+
cmap.compute_neighbors(n_neighbors=30, use_rep="X_pca", method="sklearn")
271+
cmap.compute_mapping_matrix(method="gaussian")
272+
273+
# Test float and integer data
274+
for key in ["numerical_score", "integer_score"]:
275+
cmap.map_obs(key=key)
276+
assert f"{key}_pred" in cmap.query.obs
277+
assert cmap.query.obs[f"{key}_pred"].dtype.kind == "f"
278+
279+
def test_map_obs_pseudotime_cross_mapping(self, query_reference_adata):
280+
"""Test mapping pseudotime values in cross-mapping mode - should still have reasonable correlation."""
281+
query, reference = query_reference_adata
282+
283+
# Create CellMapper and compute mapping matrix
284+
cmap = CellMapper(query=query, reference=reference)
285+
cmap.compute_neighbors(n_neighbors=30, use_rep="X_pca", method="sklearn")
286+
cmap.compute_mapping_matrix(method="gaussian")
287+
288+
# Map pseudotime
289+
cmap.map_obs(key="dpt_pseudotime")
290+
291+
# Check that pseudotime was mapped
292+
assert "dpt_pseudotime_pred" in cmap.query.obs
293+
assert cmap.query.obs["dpt_pseudotime_pred"].dtype == reference.obs["dpt_pseudotime"].dtype
294+
295+
# Check correlation between actual and predicted pseudotime in query subset
296+
# (Note: query is a subset of the original data, so we can compare)
297+
query_original_pt = query.obs["dpt_pseudotime"]
298+
query_predicted_pt = cmap.query.obs["dpt_pseudotime_pred"]
299+
300+
correlation, _ = pearsonr(query_original_pt, query_predicted_pt)
301+
302+
# Cross-mapping should still have reasonably high correlation, though lower than self-mapping
303+
assert correlation > 0.99, f"Cross-mapping pseudotime correlation too low: {correlation}"
304+
305+
# Verify no confidence scores for numerical data
306+
assert "dpt_pseudotime_conf" not in cmap.query.obs

tests/test_self_mapping.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,26 @@ def test_self_mapping_initialization(self, adata_pbmc3k):
1515
assert cm.reference is adata_pbmc3k
1616
assert cm.query is adata_pbmc3k
1717

18-
def test_identity_mapping(self, adata_pbmc3k):
18+
@pytest.mark.parametrize("obs_key", ["leiden", "dpt_pseudotime"])
19+
def test_identity_mapping(self, adata_pbmc3k, obs_key):
1920
"""Test that with n_neighbors=1, self-mapping preserves original labels exactly."""
2021
# Initialize with self-mapping
2122
cm = CellMapper(adata_pbmc3k)
22-
cm.fit(
23+
cm.map(
2324
knn_method="sklearn",
2425
mapping_method="jaccard",
25-
obs_keys="leiden",
26+
obs_keys=obs_key,
2627
use_rep="X_pca",
2728
n_neighbors=1,
28-
prediction_postfix="transfer",
29+
prediction_postfix="pred",
2930
)
3031

3132
# With n_neighbors=1, labels should be perfectly preserved
32-
assert "leiden_transfer" in adata_pbmc3k.obs
33-
assert len(adata_pbmc3k.obs["leiden_transfer"]) == len(adata_pbmc3k.obs["leiden"])
34-
# Check that all predicted labels are valid categories
35-
assert set(adata_pbmc3k.obs["leiden_transfer"].cat.categories) <= set(adata_pbmc3k.obs["leiden"].cat.categories)
33+
assert f"{obs_key}_pred" in adata_pbmc3k.obs
34+
assert len(adata_pbmc3k.obs[f"{obs_key}_pred"]) == len(adata_pbmc3k.obs[obs_key])
35+
3636
# Labels should match exactly when n_neighbors=1
37-
assert adata_pbmc3k.obs["leiden_transfer"].equals(adata_pbmc3k.obs["leiden"])
37+
assert adata_pbmc3k.obs[f"{obs_key}_pred"].equals(adata_pbmc3k.obs[obs_key])
3838

3939
def test_all_operations_self_mapping(self, adata_pbmc3k):
4040
"""Test the full pipeline in self-mapping mode."""
@@ -46,16 +46,16 @@ def test_all_operations_self_mapping(self, adata_pbmc3k):
4646
cm.compute_mapping_matrix(method="gaussian")
4747

4848
# Test label transfer
49-
cm.transfer_labels(obs_keys="leiden")
49+
cm.map_obs(key="leiden")
5050
assert "leiden_pred" in cm.query.obs
5151
# With n_neighbors>1, self-mapped labels might not be 100% identical
5252

5353
# Test embedding transfer
54-
cm.transfer_embeddings(obsm_keys="X_pca")
54+
cm.map_obsm(key="X_pca")
5555
assert "X_pca_pred" in cm.query.obsm
5656

5757
# Test expression transfer
58-
cm.transfer_expression(layer_key="X")
58+
cm.map_layers(key="X")
5959
assert cm.query_imputed is not None
6060

6161
# Test evaluation functions
@@ -84,7 +84,7 @@ def test_load_scanpy_distances(self, adata_spatial, n_neighbors):
8484

8585
# Test the full pipeline with precomputed distances
8686
cm.compute_mapping_matrix(method="gaussian")
87-
cm.transfer_labels(obs_keys="leiden")
87+
cm.map_obs(key="leiden")
8888

8989
assert "leiden_pred" in cm.query.obs
9090
assert "leiden_conf" in cm.query.obs
@@ -143,7 +143,7 @@ def test_load_squidpy_distances(self, adata_spatial, squidpy_params):
143143

144144
# Test the mapping pipeline
145145
cm.compute_mapping_matrix(method="gaussian")
146-
cm.transfer_labels(obs_keys="leiden")
146+
cm.map_obs(key="leiden")
147147

148148
assert "leiden_pred" in cm.query.obs
149149
assert "leiden_conf" in cm.query.obs
@@ -180,8 +180,8 @@ def test_load_distances_with_include_self(self, adata_spatial, include_self):
180180
cm_without_self.compute_mapping_matrix(method="gaussian")
181181

182182
# Compute label transfer for both
183-
cm_with_self.transfer_labels(obs_keys="leiden", prediction_postfix="with_self")
184-
cm_without_self.transfer_labels(obs_keys="leiden", prediction_postfix="without_self")
183+
cm_with_self.map_obs(key="leiden", prediction_postfix="with_self")
184+
cm_without_self.map_obs(key="leiden", prediction_postfix="without_self")
185185

186186
# Both should have created prediction columns
187187
assert "leiden_with_self" in adata_spatial.obs
@@ -204,5 +204,5 @@ def test_self_mapping_without_rep(self, adata_pbmc3k):
204204
assert adata_pbmc3k.obsm["X_pca"].shape[1] == 10
205205

206206
# Test rest of pipeline
207-
cm.transfer_labels(obs_keys="leiden")
207+
cm.map_obs(key="leiden")
208208
assert "leiden_pred" in cm.query.obs

0 commit comments

Comments
 (0)