55
66from __future__ import annotations
77
8+ from itertools import product
89from typing import TYPE_CHECKING
910
11+ import anndata as ad
12+
1013import scanpy as sc
1114
1215from ._utils import get_count_dataset
1316
1417if TYPE_CHECKING :
15- from anndata import AnnData
16-
1718 from ._utils import Dataset , KeyCount
1819
19- # setup variables
20-
21- adata : AnnData
22- batch_key : str | None
23-
24-
25- def setup (dataset : Dataset , layer : KeyCount , * _ ):
26- """Set up global variables before each benchmark."""
27- global adata , batch_key
28- adata , batch_key = get_count_dataset (dataset , layer = layer )
29- assert "log1p" not in adata .uns
30-
3120
3221# ASV suite
22+ class PreprocessingCountsSuite : # noqa: D101
23+ params : tuple [list [Dataset ], list [KeyCount ]] = (
24+ ["pbmc68k_reduced" , "pbmc3k" ],
25+ ["counts" , "counts-off-axis" ],
26+ )
27+ param_names = ("dataset" , "layer" )
3328
34- params : tuple [list [Dataset ], list [KeyCount ]] = (
35- ["pbmc68k_reduced" , "pbmc3k" ],
36- ["counts" , "counts-off-axis" ],
37- )
38- param_names = ["dataset" , "layer" ]
39-
40-
41- def time_filter_cells (* _ ):
42- sc .pp .filter_cells (adata , min_genes = 100 )
43-
44-
45- def peakmem_filter_cells (* _ ):
46- sc .pp .filter_cells (adata , min_genes = 100 )
47-
48-
49- def time_filter_genes (* _ ):
50- sc .pp .filter_genes (adata , min_cells = 3 )
51-
29+ def setup_cache (self ) -> None :
30+ """Without this caching, asv was running several processes which meant the data was repeatedly downloaded."""
31+ for dataset , layer in product (* self .params ):
32+ adata , batch_key = get_count_dataset (dataset , layer = layer )
33+ assert "lop1p" not in adata .uns
34+ adata .uns ["batch_key" ] = batch_key
35+ adata .write_h5ad (f"{ dataset } _{ layer } .h5ad" )
5236
53- def peakmem_filter_genes ( * _ ) :
54- sc . pp . filter_genes ( adata , min_cells = 3 )
37+ def setup ( self , dataset , layer ) -> None :
38+ self . adata = ad . read_h5ad ( f" { dataset } _ { layer } .h5ad" )
5539
40+ def time_filter_cells (self , * _ ) -> None :
41+ sc .pp .filter_cells (self .adata , min_genes = 100 )
5642
57- def time_scrublet ( * _ ):
58- sc .pp .scrublet ( adata , batch_key = batch_key )
43+ def peakmem_filter_cells ( self , * _ ) -> None :
44+ sc .pp .filter_cells ( self . adata , min_genes = 100 )
5945
46+ def time_filter_genes (self , * _ ) -> None :
47+ sc .pp .filter_genes (self .adata , min_cells = 3 )
6048
61- def peakmem_scrublet ( * _ ):
62- sc .pp .scrublet ( adata , batch_key = batch_key )
49+ def peakmem_filter_genes ( self , * _ ) -> None :
50+ sc .pp .filter_genes ( self . adata , min_cells = 3 )
6351
52+ def time_scrublet (self , * _ ) -> None :
53+ sc .pp .scrublet (self .adata , batch_key = self .adata .uns ["batch_key" ])
6454
65- # Can’t do seurat v3 yet: https://github.com/conda-forge/scikit-misc-feedstock/issues/17
66- """
67- def time_hvg_seurat_v3(*_):
68- # seurat v3 runs on counts
69- sc.pp.highly_variable_genes(adata, flavor="seurat_v3_paper")
55+ def peakmem_scrublet (self , * _ ) -> None :
56+ sc .pp .scrublet (self .adata , batch_key = self .adata .uns ["batch_key" ])
7057
58+ # sciki-misc does not exit on osx-arm64
59+ # https://github.com/conda-forge/scikit-misc-feedstock/pull/29
60+ # def time_hvg_seurat_v3(self, *_):
61+ # # seurat v3 runs on counts
62+ # sc.pp.highly_variable_genes(self.adata, flavor="seurat_v3_paper")
7163
72- def peakmem_hvg_seurat_v3(*_):
73- sc.pp.highly_variable_genes(adata, flavor="seurat_v3_paper")
74- """
64+ # def peakmem_hvg_seurat_v3(self, *_):
65+ # sc.pp.highly_variable_genes(self.adata, flavor="seurat_v3_paper")
7566
7667
7768class FastSuite :
@@ -83,28 +74,38 @@ class FastSuite:
8374 )
8475 param_names = ("dataset" , "layer" )
8576
86- def time_calculate_qc_metrics (self , * _ ):
77+ def setup_cache (self ) -> None :
78+ """Without this caching, asv was running several processes which meant the data was repeatedly downloaded."""
79+ for dataset , layer in product (* self .params ):
80+ adata , _ = get_count_dataset (dataset , layer = layer )
81+ assert "lop1p" not in adata .uns
82+ adata .write_h5ad (f"{ dataset } _{ layer } .h5ad" )
83+
84+ def setup (self , dataset , layer ) -> None :
85+ self .adata = ad .read_h5ad (f"{ dataset } _{ layer } .h5ad" )
86+
87+ def time_calculate_qc_metrics (self , * _ ) -> None :
8788 sc .pp .calculate_qc_metrics (
88- adata , qc_vars = ["mt" ], percent_top = None , log1p = False , inplace = True
89+ self . adata , qc_vars = ["mt" ], percent_top = None , log1p = False , inplace = True
8990 )
9091
91- def peakmem_calculate_qc_metrics (self , * _ ):
92+ def peakmem_calculate_qc_metrics (self , * _ ) -> None :
9293 sc .pp .calculate_qc_metrics (
93- adata , qc_vars = ["mt" ], percent_top = None , log1p = False , inplace = True
94+ self . adata , qc_vars = ["mt" ], percent_top = None , log1p = False , inplace = True
9495 )
9596
96- def time_normalize_total (self , * _ ):
97- sc .pp .normalize_total (adata , target_sum = 1e4 )
97+ def time_normalize_total (self , * _ ) -> None :
98+ sc .pp .normalize_total (self . adata , target_sum = 1e4 )
9899
99- def peakmem_normalize_total (self , * _ ):
100- sc .pp .normalize_total (adata , target_sum = 1e4 )
100+ def peakmem_normalize_total (self , * _ ) -> None :
101+ sc .pp .normalize_total (self . adata , target_sum = 1e4 )
101102
102- def time_log1p (self , * _ ):
103- # TODO: This would fail: assert "log1p" not in adata.uns, "ASV bug?"
103+ def time_log1p (self , * _ ) -> None :
104+ # TODO: This would fail: assert "log1p" not in self. adata.uns, "ASV bug?"
104105 # https://github.com/scverse/scanpy/issues/3052
105- adata .uns .pop ("log1p" , None )
106- sc .pp .log1p (adata )
106+ self . adata .uns .pop ("log1p" , None )
107+ sc .pp .log1p (self . adata )
107108
108- def peakmem_log1p (self , * _ ):
109- adata .uns .pop ("log1p" , None )
110- sc .pp .log1p (adata )
109+ def peakmem_log1p (self , * _ ) -> None :
110+ self . adata .uns .pop ("log1p" , None )
111+ sc .pp .log1p (self . adata )
0 commit comments