8686
8787INSPECT_PARSER = re .compile (r'^.*?(?P<count>[0-9]+)' )
8888
89- def check_kwargs (kwargs ):
90- if not kwargs :
91- return
92- try :
93- import inspect as inspect_module
94- import cellsweep
95- sig = inspect_module .signature (cellsweep .denoise_count_matrix .__wrapped__ )
96- cellsweep_arg_names = list (sig .parameters .keys ())
97- if any (param not in cellsweep_arg_names for param in kwargs ):
98- invalid_params = [
99- param for param in kwargs
100- if param not in cellsweep_arg_names
101- ]
102- raise TypeError (
103- f"count() got an unexpected keyword argument(s): { ', ' .join (invalid_params )} "
104- )
105- except Exception as e :
106- pass
107-
108- def run_cellsweep (counts_dir , out_dir , threads = 2 , kwargs = None ):
109- try :
110- import inspect as inspect_module
111- import cellsweep
112- sig = inspect_module .signature (cellsweep .denoise_count_matrix .__wrapped__ )
113- cellsweep_arg_names = list (sig .parameters .keys ())
114- cellsweep_kwargs = {}
115- if kwargs :
116- cellsweep_kwargs = {k : v for k , v in kwargs .items () if k in cellsweep_arg_names }
117- cellsweep_counts_dir = os .path .join (out_dir , "counts_swept" )
118- cellsweep_adata_path = os .path .join (cellsweep_counts_dir , "swept_adata.h5ad" )
119-
120- matrix_path = os .path .join (counts_dir , "cells_x_genes.mtx" )
121- barcodes_path = os .path .join (counts_dir , "cells_x_genes.barcodes.txt" )
122- genes_path = os .path .join (counts_dir , "cells_x_genes.genes.names.txt" )
123- adata = import_matrix_as_anndata (matrix_path , barcodes_path , genes_path )
124- # adata = cellsweep.utils.read_kb_mtx_as_adata(counts_dir)
125-
126- # TODO:
127- #* 1. think of how to do automatic celltyping
128- #* 2. implement the requirement for expected_cells or umi_cutoff, or have a way to auto-detect
129-
130- # add celltypes
131- # adata = cs_utils.determine_cell_types(adata, model_pkl=model_pkl, filter_empty=True, expected_cells=expected_cells, celltypist_convert=celltypist_convert, celltypist_map_file=celltypist_map_file, verbose=verbose)
132-
133- import numpy as np #!!! erase
134- celltypes = ["celltype1" , "celltype2" , "celltype3" ] #!!! erase
135- adata .obs ["celltype" ] = np .random .choice (celltypes , size = adata .n_obs ) #!!! erase
136-
137- _ = cellsweep .denoise_count_matrix (
138- adata = adata ,
139- adata_out = cellsweep_adata_path ,
140- threads = threads ,
141- ** cellsweep_kwargs
142- )
143- return cellsweep_adata_path
144- except Exception as e :
145- logger .error (f"Error running cellsweep: { e } " )
146- return None
14789
14890def make_transcript_t2g (txnames_path : str , out_path : str ) -> str :
14991 """Make a two-column t2g file from a transcripts file
@@ -1313,8 +1255,6 @@ def count(
13131255 quant_umis : bool = False ,
13141256 keep_flags : bool = False ,
13151257 exact_barcodes : bool = False ,
1316- remove_ambient : bool = False ,
1317- ** kwargs
13181258) -> Dict [str , Union [str , Dict [str , str ]]]:
13191259 """Generates count matrices for single-cell RNA seq.
13201260
@@ -1392,18 +1332,13 @@ def count(
13921332 quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
13931333 keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
13941334 exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
1395- remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
1396- **kwargs: Additional keyword arguments to pass to CellSweep
13971335
13981336 Returns:
13991337 Dictionary containing paths to generated files
14001338 """
14011339 STATS .start ()
14021340 is_batch = isinstance (fastqs , str )
14031341
1404- #* kwargs is only added for cellsweep, so check accordingly
1405- check_kwargs (kwargs )
1406-
14071342 results = {}
14081343 make_directory (out_dir )
14091344 unfiltered_results = results .setdefault ('unfiltered' , {})
@@ -1820,10 +1755,6 @@ def update_results_with_suffix(current_results, new_results, suffix):
18201755 temp_dir = temp_dir
18211756 )
18221757 unfiltered_results .update (report_result )
1823-
1824- if remove_ambient :
1825- logger .info ('Removing ambient RNA using CellSweep' )
1826- results ['swept_counts' ] = run_cellsweep (counts_dir = counts_dir , out_dir = out_dir , threads = threads , kwargs = kwargs )
18271758
18281759 # Delete intermediate BUS files if requested
18291760 if delete_bus :
@@ -1910,8 +1841,6 @@ def count_nac(
19101841 quant_umis : bool = False ,
19111842 keep_flags : bool = False ,
19121843 exact_barcodes : bool = False ,
1913- remove_ambient : bool = False ,
1914- ** kwargs
19151844) -> Dict [str , Union [Dict [str , str ], str ]]:
19161845 """Generates RNA velocity matrices for single-cell RNA seq.
19171846
@@ -1988,18 +1917,13 @@ def count_nac(
19881917 quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
19891918 keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
19901919 exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
1991- remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
1992- **kwargs: Additional keyword arguments to pass to CellSweep
19931920
19941921 Returns:
19951922 Dictionary containing path to generated index
19961923 """
19971924 STATS .start ()
19981925 is_batch = isinstance (fastqs , str )
19991926
2000- #* kwargs is only added for cellsweep, so check accordingly
2001- check_kwargs (kwargs )
2002-
20031927 results = {}
20041928 make_directory (out_dir )
20051929 unfiltered_results = results .setdefault ('unfiltered' , {})
@@ -2559,10 +2483,6 @@ def update_results_with_suffix(current_results, new_results, suffix):
25592483 logger .warning (
25602484 'Plots for TCC matrices have not yet been implemented. The HTML report will not contain any plots.'
25612485 )
2562-
2563- if remove_ambient :
2564- logger .info ('Removing ambient RNA using CellSweep' )
2565- results ['swept_counts' ] = run_cellsweep (counts_dir = counts_dir , out_dir = out_dir , threads = threads , kwargs = kwargs )
25662486
25672487 # Delete intermediate BUS files if requested
25682488 if delete_bus :
@@ -2621,8 +2541,6 @@ def count_velocity(
26212541 strand : Optional [Literal ['unstranded' , 'forward' , 'reverse' ]] = None ,
26222542 umi_gene : bool = False ,
26232543 em : bool = False ,
2624- remove_ambient : bool = False ,
2625- ** kwargs
26262544) -> Dict [str , Union [Dict [str , str ], str ]]:
26272545 """Generates RNA velocity matrices (DEPRECATED).
26282546
@@ -2670,8 +2588,7 @@ def count_velocity(
26702588 `False`
26712589 em: Whether to estimate gene abundances using EM algorithm, defaults to
26722590 `False`
2673- remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
2674- **kwargs: Additional keyword arguments to pass to CellSweep
2591+
26752592 Returns:
26762593 Dictionary containing path to generated index
26772594 """
@@ -2680,9 +2597,6 @@ def count_velocity(
26802597 BUS_CDNA_PREFIX = 'spliced'
26812598 BUS_INTRON_PREFIX = 'unspliced'
26822599
2683- #* kwargs is only added for cellsweep, so check accordingly
2684- check_kwargs (kwargs )
2685-
26862600 results = {}
26872601 make_directory (out_dir )
26882602 unfiltered_results = results .setdefault ('unfiltered' , {})
@@ -2979,10 +2893,6 @@ def count_velocity(
29792893 stats_path = STATS .save (os .path .join (out_dir , KB_INFO_FILENAME ))
29802894 results .update ({'stats' : stats_path })
29812895
2982- if remove_ambient :
2983- logger .info ('Removing ambient RNA using CellSweep' )
2984- results ['swept_counts' ] = run_cellsweep (counts_dir = counts_dir , out_dir = out_dir , threads = threads , kwargs = kwargs )
2985-
29862896 # Reports
29872897 nb_path = os .path .join (out_dir , REPORT_NOTEBOOK_FILENAME )
29882898 html_path = os .path .join (out_dir , REPORT_HTML_FILENAME )
@@ -3052,8 +2962,6 @@ def count_velocity_smartseq3(
30522962 by_name : bool = False ,
30532963 inspect : bool = True ,
30542964 strand : Optional [Literal ['unstranded' , 'forward' , 'reverse' ]] = None ,
3055- remove_ambient : bool = False ,
3056- ** kwargs
30572965) -> Dict [str , Union [str , Dict [str , str ]]]:
30582966 """Generates count matrices for Smartseq3 (DEPRECATED).
30592967
@@ -3080,8 +2988,6 @@ def count_velocity_smartseq3(
30802988 inspect: Whether or not to inspect the output BUS file and generate
30812989 the inspect.json
30822990 strand: Strandedness, defaults to `None`
3083- remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
3084- **kwargs: Additional keyword arguments to pass to CellSweep
30852991
30862992 Returns:
30872993 Dictionary containing paths to generated files
@@ -3091,9 +2997,6 @@ def count_velocity_smartseq3(
30912997 BUS_CDNA_PREFIX = 'spliced'
30922998 BUS_INTRON_PREFIX = 'unspliced'
30932999
3094- #* kwargs is only added for cellsweep, so check accordingly
3095- check_kwargs (kwargs )
3096-
30973000 results = {}
30983001 make_directory (out_dir )
30993002 unfiltered_results = results .setdefault ('unfiltered' , {})
0 commit comments