1+ import numpy as np
12import anndata as ad
2- import txsim as tx
3+ import scanpy as sc
4+ from scipy import sparse
5+ from scipy .sparse import issparse
6+ from scanpy ._utils import view_to_actual
7+ from sklearn .utils import sparsefuncs
38
49## VIASH START
510par = {
611 'input_spatial_aggregated_counts' : 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad' ,
712 'input_cell_volumes' : 'resources_test/task_ist_preprocessing/mouse_brain_combined/cell_volumes.h5ad' ,
813 'output' : 'spatial_norm_counts.h5ad' ,
14+
15+ 'target_volume' : None ,
916}
1017meta = {
1118 'name' : 'normalize_by_volume' ,
1421
1522assert par ['input_cell_volumes' ] is not None , 'Volume input is required for this normalization method.'
1623
24+
25+ ### Function definition ###
26+
27+ def normalize_by_volume (
28+ adata : ad .AnnData ,
29+ target_volume : float | None = None ,
30+ volume_key : str = 'volume' ,
31+ key_added : str | None = None ,
32+ layer : str | None = None ,
33+ inplace : bool = True ,
34+ ) -> np .ndarray | sparse .spmatrix | None :
35+ """Normalize counts by cell volume
36+
37+ Parameters
38+ ----------
39+ adata : AnnData
40+ The annotated data matrix of shape `n_obs` x `n_vars`.
41+ Rows correspond to cells and columns to genes.
42+ target_volume
43+ If `None`, after normalization, each observation (cell) has a
44+ volume equal to the median of cell volumes. Note that when providing
45+ a float value it is crucial to know in which spatial units the cell volume
46+ was calculated.
47+ volume_key : str
48+ Name of the field in `adata.obs` where the cell volume (or area) is stored,
49+ by default 'volume'
50+ key_added
51+ Name of the field in `adata.obs` where the normalization factor is
52+ stored.
53+ layer
54+ Layer to normalize instead of `X`. If `None`, `X` is normalized.
55+ inplace
56+ Whether to update `adata` or return dictionary with normalized copies of
57+ `adata.X` and `adata.layers`.
58+
59+
60+ Returns
61+ -------
62+ np.ndarray
63+ If ``inplace=True``, ``adata.X`` is updated with the normalized values.
64+ Otherwise, returns the normalized numpy array or sparse matrix
65+ """
66+
67+ view_to_actual (adata )
68+
69+ X = adata .X if (layer is None ) else adata .layers [layer ]
70+
71+ assert volume_key in adata .obs .columns , f"key { volume_key } not found in adata.obs"
72+ assert not adata .obs [volume_key ].isnull ().any (), "Nan values found in adata.obs[volume_key]. All cells must have valid volumes."
73+ assert (adata .obs [volume_key ] > 0 ).all (), "All cell volumes in adata.obs[volume_key] must be > 0"
74+
75+ if target_volume is None :
76+ target_volume = adata .obs [volume_key ].median ()
77+
78+ volumes = adata .obs [volume_key ].values .copy ()
79+ norm_factors = target_volume / volumes
80+
81+ if issparse (X ):
82+ if X .dtype != norm_factors .dtype :
83+ X = X .astype (np .float64 )
84+ norm_factors = norm_factors .astype (np .float64 )
85+ if inplace :
86+ sparsefuncs .inplace_row_scale (X , norm_factors )
87+ else :
88+ X_norm = X .copy ()
89+ sparsefuncs .inplace_row_scale (X_norm , norm_factors )
90+ else :
91+ if inplace :
92+ np .divide (X , volumes [:,None ], out = X )
93+ else :
94+ X_norm = X / volumes [:, None ]
95+
96+ if key_added :
97+ adata .obs [key_added ] = norm_factors
98+
99+ if not inplace :
100+ return X_norm
101+
102+
103+ ### Main function ###
104+
17105print ('Reading input files' , flush = True )
18- adata = ad .read (par ['input_spatial_aggregated_counts' ])
19- adata_volume = ad .read (par ['input_cell_volumes' ])
106+ adata = ad .read_h5ad (par ['input_spatial_aggregated_counts' ])
107+ adata_volume = ad .read_h5ad (par ['input_cell_volumes' ])
20108assert adata .obs ["cell_id" ].astype (int ).sort_values ().equals (adata_volume .obs ["cell_id" ].astype (int ).sort_values ()), "Cell IDs do not match"
21109adata .obs ["volume" ] = adata_volume .obs .loc [adata .obs ["cell_id" ].astype (str ), "volume" ]
22110
23111print ('Normalizing by volume' , flush = True )
24- # TODO: Add scaling parameter, also as input to the script
25- tx .preprocessing .normalize_by_area (adata , area = "volume" )
26-
27- adata .layers ['normalized' ] = adata .layers ['lognorm' ] #TODO: Check if openproblems "normalized" is understood as log normalized
28- del adata .layers ['norm' ]
29- del adata .layers ['lognorm' ]
30- #del adata.layers['raw'] #TODO: Probably better to add this, otherwise some downstream methods' tests will succeed by chance
112+ adata .layers ['normalized' ] = adata .layers ['counts' ].copy ()
113+ normalize_by_volume (adata , layer = "normalized" , target_volume = par ["target_volume" ])
114+ sc .pp .log1p (adata , layer = "normalized" )
31115
32116print ('Writing output' , flush = True )
33117adata .write (par ['output' ])
0 commit comments