|
80 | 80 | adata_sp.layers["corrected_counts"] = adata_sp.layers['counts'].multiply((samples_corr.loc['post_sample_q50', 'px_rate'] / ( |
81 | 81 | 1.0 + samples_corr.loc['post_sample_q50', 'px_rate'] + samples.loc['post_sample_means', 'mean_poisson']))).tocsr() |
82 | 82 |
|
| 83 | +# Normalize the corrected counts #TODO: see NOTE below |
| 84 | +size_factors = np.array(adata_sp.layers['counts'].sum(axis=1) / adata_sp.layers['normalized'].expm1().sum(axis=1))[:,0] |
| 85 | +adata_sp.layers["normalized"] = adata_sp.layers['corrected_counts'].multiply(1/size_factors[:,None]).log1p().toarray() |
| 86 | +adata_sp.layers["counts"] = adata_sp.layers['corrected_counts'] |
| 87 | +del adata_sp.layers['corrected_counts'] |
| 88 | +# NOTE: this way of normalizing is not ideal. The problem is that we would need to apply the same normalization method |
| 89 | +# to the corrected counts again. However, the pipeline setup runs the normalization step before expression correction. |
| 90 | +# One solution would have been to move resolVI correction after the count aggregation. However, in that case we could |
| 91 | +# only apply the unsupervised version of resolVI since ct annotation is required. In tutorials the supervised one is recommended. |
| 92 | +# Possible future solutions (all quite some work): |
| 93 | +# - Add an additional compute step that runs the normalization step after expression correction in case of running resolVI. |
| 94 | +# - Feed the output of the resolVI correction back into the normalization step (this would then also run ct annotation) |
| 95 | +# and it would also run the correction step again (which would be problematic and need some workaround) |
| 96 | +# - Move resolVI correction after the count aggregation but include a generic cell type annotation step (either a workflow step |
| 97 | +# or an annotation within the resolVI script) |
| 98 | + |
83 | 99 | # Write output |
84 | 100 | print('Writing output', flush=True) |
85 | 101 | adata_sp.write(par['output']) |
0 commit comments