Skip to content

Commit c6c1827

Browse files
committed
Revert "write cosine to zarr"
This reverts commit d3e376f.
1 parent c41ebc7 commit c6c1827

File tree

7 files changed

+16
-177
lines changed

7 files changed

+16
-177
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,3 @@ pyo3 = { version = "0.21", features = ["extension-module"] }
2222
pyo3-polars = { version = "0.18", features = ["dtype-categorical"] }
2323
rayon = { version = "1.8" }
2424
sprs = { version = "= 0.11.1", features = ["serde"] }
25-
zarrs = { version = "0.16.4", features = ["ndarray", "gzip"] }

sainsc/_utils_rust.pyi

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from pathlib import Path
21
from typing import Self
32

43
import numpy as np
@@ -43,12 +42,10 @@ def coordinate_as_string(
4342
def cosinef32_and_celltypei8(
4443
counts: GridCounts,
4544
genes: list[str],
46-
celltypes: list[str],
4745
signatures: NDArray[np.float32],
4846
kernel: NDArray[np.float32],
4947
*,
5048
log: bool = False,
51-
zarr_path: Path | None = None,
5249
chunk_size: tuple[int, int] = (500, 500),
5350
n_threads: int | None = None,
5451
) -> tuple[NDArray[np.float32], NDArray[np.float32], NDArray[np.int8]]:
@@ -60,12 +57,10 @@ def cosinef32_and_celltypei8(
6057
def cosinef32_and_celltypei16(
6158
counts: GridCounts,
6259
genes: list[str],
63-
celltypes: list[str],
6460
signatures: NDArray[np.float32],
6561
kernel: NDArray[np.float32],
6662
*,
6763
log: bool = False,
68-
zarr_path: Path | None = None,
6964
chunk_size: tuple[int, int] = (500, 500),
7065
n_threads: int | None = None,
7166
) -> tuple[NDArray[np.float32], NDArray[np.float32], NDArray[np.int16]]:

sainsc/lazykde/_LazyKDE.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from collections.abc import Iterable
2-
from pathlib import Path
32
from typing import TYPE_CHECKING, Any, Self, TypeVar
43

54
import matplotlib.pyplot as plt
@@ -18,7 +17,7 @@
1817
from scipy.sparse import coo_array, csc_array, csr_array
1918
from skimage.feature import peak_local_max
2019

21-
from .._typealias import _Cmap, _Csx, _CsxArray, _Local_Max, _PathLike, _RangeTuple2D
20+
from .._typealias import _Cmap, _Csx, _CsxArray, _Local_Max, _RangeTuple2D
2221
from .._utils import _raise_module_load_error, _validate_n_threads, validate_threads
2322
from .._utils_rust import (
2423
GridCounts,
@@ -537,7 +536,6 @@ def assign_celltype(
537536
signatures: pd.DataFrame,
538537
*,
539538
log: bool = False,
540-
zarr_path: _PathLike | None = None,
541539
chunk: tuple[int, int] = (500, 500),
542540
):
543541
"""
@@ -553,9 +551,6 @@ def assign_celltype(
553551
log : bool
554552
Whether to log transform the KDE when calculating the cosine similarity.
555553
This is useful if the gene signatures are derived from log-transformed data.
556-
zarr_path : os.PathLike, str, or None
557-
If not `None` the cosine similarities for all cell types will be written to
558-
the specified path as zarr storage.
559554
chunk : tuple[int, int]
560555
Size of the chunks for processing. Larger chunks require more memory but
561556
have less duplicated computation.
@@ -568,13 +563,8 @@ def assign_celltype(
568563
If `self.kernel` is not set.
569564
ValueError
570565
If `chunk` is smaller than the shape of `self.kernel`.
571-
ValueError
572-
If `zarr_path` is not None and the celltype names contain
573-
illegal characters for file names.
574566
"""
575567

576-
ILLEGAL_CHARS = ["/", "\\"]
577-
578568
if not all(signatures.index.isin(self.genes)):
579569
raise ValueError(
580570
"Not all genes in the gene signature are part of this KDE."
@@ -591,16 +581,6 @@ def assign_celltype(
591581
celltypes = signatures.columns.tolist()
592582
ct_dtype = _get_cell_dtype(len(celltypes))
593583

594-
zarr_path = None if zarr_path is None else Path(zarr_path)
595-
596-
if zarr_path is not None and any(
597-
char in ct for char in ILLEGAL_CHARS for ct in celltypes
598-
):
599-
raise ValueError(
600-
"Celltype names contain at least one of the illegal characters: "
601-
f"{ILLEGAL_CHARS}"
602-
)
603-
604584
# scale signatures to unit norm
605585
signatures_mat = signatures.to_numpy()
606586
signatures_mat = (
@@ -614,11 +594,9 @@ def assign_celltype(
614594
self._cosine_similarity, self._assignment_score, self._celltype_map = fn(
615595
self.counts,
616596
genes,
617-
celltypes,
618597
signatures_mat,
619598
self.kernel,
620599
log=log,
621-
zarr_path=zarr_path,
622600
chunk_size=chunk,
623601
n_threads=self.n_threads,
624602
)

sainsc/lazykde/_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from collections.abc import Iterable
2-
from pathlib import Path
32
from typing import Protocol, TypeVar
43

54
import numpy as np
@@ -60,12 +59,10 @@ def __call__(
6059
self,
6160
counts: GridCounts,
6261
genes: list[str],
63-
celltypes: list[str],
6462
signatures: NDArray[np.float32],
6563
kernel: NDArray[np.float32],
6664
*,
6765
log: bool = ...,
68-
zarr_path: Path | None = None,
6966
chunk_size: tuple[int, int] = ...,
7067
n_threads: int | None = ...,
7168
) -> tuple[NDArray[np.float32], NDArray[np.float32], NDArray[np.signedinteger]]: ...

src/cosine.rs

Lines changed: 15 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use crate::cosine_zarr::{initialize_cosine_zarrstore, write_cosine_to_zarr, ZarrChunkInfo};
21
use crate::gridcounts::GridCounts;
32
use crate::sparsekde::sparse_kde_csx_;
43
use crate::utils::create_pool;
@@ -13,23 +12,20 @@ use numpy::{IntoPyArray, PyArray2, PyReadonlyArray2};
1312
use pyo3::{exceptions::PyValueError, prelude::*};
1413
use rayon::prelude::*;
1514
use sprs::{CompressedStorage::CSR, CsMatI, CsMatViewI, SpIndex};
16-
use std::{cmp::min, error::Error, ops::Range, path::PathBuf};
17-
use zarrs::array::Element;
15+
use std::{cmp::min, error::Error, ops::Range};
1816

1917
macro_rules! build_cos_ct_fn {
2018
($name:tt, $t_cos:ty, $t_ct:ty) => {
2119
#[pyfunction]
22-
#[pyo3(signature = (counts, genes, celltypes, signatures, kernel, *, log=false, zarr_path=None, chunk_size=(500, 500), n_threads=None))]
20+
#[pyo3(signature = (counts, genes, signatures, kernel, *, log=false, chunk_size=(500, 500), n_threads=None))]
2321
/// calculate cosine similarity and assign celltype
2422
pub fn $name<'py>(
2523
py: Python<'py>,
2624
counts: &mut GridCounts,
2725
genes: Vec<String>,
28-
celltypes: Vec<String>,
2926
signatures: PyReadonlyArray2<'py, $t_cos>,
3027
kernel: PyReadonlyArray2<'py, $t_cos>,
3128
log: bool,
32-
zarr_path:Option<PathBuf>,
3329
chunk_size: (usize, usize),
3430
n_threads: Option<usize>,
3531
) -> PyResult<(
@@ -50,12 +46,10 @@ macro_rules! build_cos_ct_fn {
5046

5147
let cos_ct = chunk_and_calculate_cosine(
5248
&gene_counts,
53-
celltypes,
5449
signatures.as_array(),
5550
kernel.as_array(),
5651
counts.shape,
5752
log,
58-
zarr_path,
5953
chunk_size,
6054
n_threads
6155
);
@@ -77,19 +71,17 @@ build_cos_ct_fn!(cosinef32_and_celltypei16, f32, i16);
7771

7872
fn chunk_and_calculate_cosine<C, I, F, U>(
7973
counts: &[CsMatViewI<C, I>],
80-
celltypes: Vec<String>,
8174
signatures: ArrayView2<F>,
8275
kernel: ArrayView2<F>,
8376
shape: (usize, usize),
8477
log: bool,
85-
zarr_path: Option<PathBuf>,
8678
chunk_size: (usize, usize),
8779
n_threads: Option<usize>,
88-
) -> Result<(Array2<F>, Array2<F>, Array2<U>), Box<dyn Error + Send + Sync>>
80+
) -> Result<(Array2<F>, Array2<F>, Array2<U>), Box<dyn Error>>
8981
where
9082
C: NumCast + Copy + Sync + Send + Default,
9183
I: SpIndex + Signed + Sync + Send,
92-
F: NdFloat + Element,
84+
F: NdFloat,
9385
U: PrimInt + Signed + Sync + Send,
9486
Slice: From<Range<I>>,
9587
{
@@ -123,16 +115,7 @@ where
123115
}
124116
});
125117

126-
// init zarr store for celltypes with chunksize and all zero arrays
127-
let zarr_store = match zarr_path
128-
.map(|path| initialize_cosine_zarrstore(path, &celltypes, shape, chunk_size))
129-
{
130-
Some(Err(e)) => return Err(e),
131-
Some(Ok(store)) => Some(store),
132-
None => None,
133-
};
134-
135-
let celltyping_results = pool.install(|| {
118+
let ((cosine, score), celltype): ((Vec<_>, Vec<_>), Vec<_>) = pool.install(|| {
136119
// generate all chunk indices
137120
let chunk_indices: Vec<_> = (0..m).cartesian_product(0..n).collect();
138121

@@ -142,28 +125,18 @@ where
142125
.map(|idx| {
143126
let (chunk, unpad) = get_chunk(counts, idx, shape, chunk_size, pad);
144127

145-
let zarr_info = zarr_store.clone().map(|store| ZarrChunkInfo {
146-
store,
147-
celltypes: { celltypes.clone() },
148-
chunk_idx: vec![idx.0 as u64, idx.1 as u64],
149-
});
150-
151128
cosine_and_celltype_(
152129
chunk,
153130
signatures,
154131
&signature_similarity_correction,
155132
kernel,
156133
unpad,
157134
log,
158-
zarr_info,
159135
)
160136
})
161-
.collect::<Vec<_>>()
137+
.unzip()
162138
});
163139

164-
let ((cosine, score), celltype): ((Vec<_>, Vec<_>), Vec<_>) =
165-
itertools::process_results(celltyping_results, |iter| iter.unzip())?;
166-
167140
// concatenate all chunks back to original shape
168141
Ok((
169142
concat_2d(&cosine, n)?,
@@ -234,11 +207,10 @@ fn cosine_and_celltype_<C, I, F, U>(
234207
kernel: ArrayView2<F>,
235208
unpad: (Range<usize>, Range<usize>),
236209
log: bool,
237-
zarr_info: Option<ZarrChunkInfo>,
238-
) -> Result<((Array2<F>, Array2<F>), Array2<U>), Box<dyn Error + Send + Sync>>
210+
) -> ((Array2<F>, Array2<F>), Array2<U>)
239211
where
240212
C: NumCast + Copy,
241-
F: NdFloat + Element,
213+
F: NdFloat,
242214
U: PrimInt + Signed,
243215
I: SpIndex + Signed,
244216
Slice: From<Range<I>>,
@@ -253,10 +225,10 @@ where
253225
// fastpath if all csx are empty
254226
None => {
255227
let shape = (unpad_r.end - unpad_r.start, unpad_c.end - unpad_c.start);
256-
Ok((
228+
(
257229
(Array2::zeros(shape), Array2::zeros(shape)),
258230
Array2::from_elem(shape, -one::<U>()),
259-
))
231+
)
260232
}
261233
Some((csx, weights)) => {
262234
let shape = csx.shape();
@@ -290,24 +262,8 @@ where
290262
.filter(|(_, &w)| w != zero::<F>())
291263
.for_each(|(mut cos, &w)| cos += &kde_unpadded.map(|&x| x * w));
292264
}
293-
294-
kde_norm.mapv_inplace(F::sqrt);
295-
296-
if let Some(zarr_info) = zarr_info {
297-
write_cosine_to_zarr(
298-
zarr_info.store,
299-
&cosine,
300-
&kde_norm,
301-
&zarr_info.celltypes,
302-
&zarr_info.chunk_idx,
303-
)?
304-
};
305-
306-
Ok(get_max_cosine_and_celltype(
307-
cosine,
308-
kde_norm,
309-
pairwise_correction,
310-
))
265+
// TODO: write to zarr
266+
get_max_cosine_and_celltype(cosine, kde_norm, pairwise_correction)
311267
}
312268
}
313269
}
@@ -335,8 +291,9 @@ where
335291
*ct = -one::<I>();
336292
*s = zero();
337293
} else {
338-
*cos /= norm;
339-
*s /= norm;
294+
let norm_sqrt = norm.sqrt();
295+
*cos /= norm_sqrt;
296+
*s /= norm_sqrt;
340297
};
341298
});
342299

src/cosine_zarr.rs

Lines changed: 0 additions & 86 deletions
This file was deleted.

0 commit comments

Comments
 (0)