Skip to content

Commit 8fcb1e7

Browse files
committed
sweep removed from count.py and added to sweep.py
1 parent 1cda59f commit 8fcb1e7

File tree

3 files changed

+476
-288
lines changed

3 files changed

+476
-288
lines changed

kb_python/count.py

100755100644
Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -86,64 +86,6 @@
8686

8787
INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')
8888

89-
def check_kwargs(kwargs):
90-
if not kwargs:
91-
return
92-
try:
93-
import inspect as inspect_module
94-
import cellsweep
95-
sig = inspect_module.signature(cellsweep.denoise_count_matrix.__wrapped__)
96-
cellsweep_arg_names = list(sig.parameters.keys())
97-
if any(param not in cellsweep_arg_names for param in kwargs):
98-
invalid_params = [
99-
param for param in kwargs
100-
if param not in cellsweep_arg_names
101-
]
102-
raise TypeError(
103-
f"count() got an unexpected keyword argument(s): {', '.join(invalid_params)}"
104-
)
105-
except Exception as e:
106-
pass
107-
108-
def run_cellsweep(counts_dir, out_dir, threads=2, kwargs=None):
109-
try:
110-
import inspect as inspect_module
111-
import cellsweep
112-
sig = inspect_module.signature(cellsweep.denoise_count_matrix.__wrapped__)
113-
cellsweep_arg_names = list(sig.parameters.keys())
114-
cellsweep_kwargs = {}
115-
if kwargs:
116-
cellsweep_kwargs = {k: v for k, v in kwargs.items() if k in cellsweep_arg_names}
117-
cellsweep_counts_dir = os.path.join(out_dir, "counts_swept")
118-
cellsweep_adata_path = os.path.join(cellsweep_counts_dir, "swept_adata.h5ad")
119-
120-
matrix_path = os.path.join(counts_dir, "cells_x_genes.mtx")
121-
barcodes_path = os.path.join(counts_dir, "cells_x_genes.barcodes.txt")
122-
genes_path = os.path.join(counts_dir, "cells_x_genes.genes.names.txt")
123-
adata = import_matrix_as_anndata(matrix_path, barcodes_path, genes_path)
124-
# adata = cellsweep.utils.read_kb_mtx_as_adata(counts_dir)
125-
126-
# TODO:
127-
#* 1. think of how to do automatic celltyping
128-
#* 2. implement the requirement for expected_cells or umi_cutoff, or have a way to auto-detect
129-
130-
# add celltypes
131-
# adata = cs_utils.determine_cell_types(adata, model_pkl=model_pkl, filter_empty=True, expected_cells=expected_cells, celltypist_convert=celltypist_convert, celltypist_map_file=celltypist_map_file, verbose=verbose)
132-
133-
import numpy as np #!!! erase
134-
celltypes = ["celltype1", "celltype2", "celltype3"] #!!! erase
135-
adata.obs["celltype"] = np.random.choice(celltypes, size=adata.n_obs) #!!! erase
136-
137-
_ = cellsweep.denoise_count_matrix(
138-
adata=adata,
139-
adata_out=cellsweep_adata_path,
140-
threads=threads,
141-
**cellsweep_kwargs
142-
)
143-
return cellsweep_adata_path
144-
except Exception as e:
145-
logger.error(f"Error running cellsweep: {e}")
146-
return None
14789

14890
def make_transcript_t2g(txnames_path: str, out_path: str) -> str:
14991
"""Make a two-column t2g file from a transcripts file
@@ -1313,8 +1255,6 @@ def count(
13131255
quant_umis: bool = False,
13141256
keep_flags: bool = False,
13151257
exact_barcodes: bool = False,
1316-
remove_ambient: bool = False,
1317-
**kwargs
13181258
) -> Dict[str, Union[str, Dict[str, str]]]:
13191259
"""Generates count matrices for single-cell RNA seq.
13201260
@@ -1392,18 +1332,13 @@ def count(
13921332
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
13931333
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
13941334
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
1395-
remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
1396-
**kwargs: Additional keyword arguments to pass to CellSweep
13971335
13981336
Returns:
13991337
Dictionary containing paths to generated files
14001338
"""
14011339
STATS.start()
14021340
is_batch = isinstance(fastqs, str)
14031341

1404-
#* kwargs is only added for cellsweep, so check accordingly
1405-
check_kwargs(kwargs)
1406-
14071342
results = {}
14081343
make_directory(out_dir)
14091344
unfiltered_results = results.setdefault('unfiltered', {})
@@ -1820,10 +1755,6 @@ def update_results_with_suffix(current_results, new_results, suffix):
18201755
temp_dir=temp_dir
18211756
)
18221757
unfiltered_results.update(report_result)
1823-
1824-
if remove_ambient:
1825-
logger.info('Removing ambient RNA using CellSweep')
1826-
results['swept_counts'] = run_cellsweep(counts_dir=counts_dir, out_dir=out_dir, threads=threads, kwargs=kwargs)
18271758

18281759
# Delete intermediate BUS files if requested
18291760
if delete_bus:
@@ -1910,8 +1841,6 @@ def count_nac(
19101841
quant_umis: bool = False,
19111842
keep_flags: bool = False,
19121843
exact_barcodes: bool = False,
1913-
remove_ambient: bool = False,
1914-
**kwargs
19151844
) -> Dict[str, Union[Dict[str, str], str]]:
19161845
"""Generates RNA velocity matrices for single-cell RNA seq.
19171846
@@ -1988,18 +1917,13 @@ def count_nac(
19881917
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
19891918
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
19901919
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
1991-
remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
1992-
**kwargs: Additional keyword arguments to pass to CellSweep
19931920
19941921
Returns:
19951922
Dictionary containing path to generated index
19961923
"""
19971924
STATS.start()
19981925
is_batch = isinstance(fastqs, str)
19991926

2000-
#* kwargs is only added for cellsweep, so check accordingly
2001-
check_kwargs(kwargs)
2002-
20031927
results = {}
20041928
make_directory(out_dir)
20051929
unfiltered_results = results.setdefault('unfiltered', {})
@@ -2559,10 +2483,6 @@ def update_results_with_suffix(current_results, new_results, suffix):
25592483
logger.warning(
25602484
'Plots for TCC matrices have not yet been implemented. The HTML report will not contain any plots.'
25612485
)
2562-
2563-
if remove_ambient:
2564-
logger.info('Removing ambient RNA using CellSweep')
2565-
results['swept_counts'] = run_cellsweep(counts_dir=counts_dir, out_dir=out_dir, threads=threads, kwargs=kwargs)
25662486

25672487
# Delete intermediate BUS files if requested
25682488
if delete_bus:
@@ -2621,8 +2541,6 @@ def count_velocity(
26212541
strand: Optional[Literal['unstranded', 'forward', 'reverse']] = None,
26222542
umi_gene: bool = False,
26232543
em: bool = False,
2624-
remove_ambient: bool = False,
2625-
**kwargs
26262544
) -> Dict[str, Union[Dict[str, str], str]]:
26272545
"""Generates RNA velocity matrices (DEPRECATED).
26282546
@@ -2670,8 +2588,7 @@ def count_velocity(
26702588
`False`
26712589
em: Whether to estimate gene abundances using EM algorithm, defaults to
26722590
`False`
2673-
remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
2674-
**kwargs: Additional keyword arguments to pass to CellSweep
2591+
26752592
Returns:
26762593
Dictionary containing path to generated index
26772594
"""
@@ -2680,9 +2597,6 @@ def count_velocity(
26802597
BUS_CDNA_PREFIX = 'spliced'
26812598
BUS_INTRON_PREFIX = 'unspliced'
26822599

2683-
#* kwargs is only added for cellsweep, so check accordingly
2684-
check_kwargs(kwargs)
2685-
26862600
results = {}
26872601
make_directory(out_dir)
26882602
unfiltered_results = results.setdefault('unfiltered', {})
@@ -2979,10 +2893,6 @@ def count_velocity(
29792893
stats_path = STATS.save(os.path.join(out_dir, KB_INFO_FILENAME))
29802894
results.update({'stats': stats_path})
29812895

2982-
if remove_ambient:
2983-
logger.info('Removing ambient RNA using CellSweep')
2984-
results['swept_counts'] = run_cellsweep(counts_dir=counts_dir, out_dir=out_dir, threads=threads, kwargs=kwargs)
2985-
29862896
# Reports
29872897
nb_path = os.path.join(out_dir, REPORT_NOTEBOOK_FILENAME)
29882898
html_path = os.path.join(out_dir, REPORT_HTML_FILENAME)
@@ -3052,8 +2962,6 @@ def count_velocity_smartseq3(
30522962
by_name: bool = False,
30532963
inspect: bool = True,
30542964
strand: Optional[Literal['unstranded', 'forward', 'reverse']] = None,
3055-
remove_ambient: bool = False,
3056-
**kwargs
30572965
) -> Dict[str, Union[str, Dict[str, str]]]:
30582966
"""Generates count matrices for Smartseq3 (DEPRECATED).
30592967
@@ -3080,8 +2988,6 @@ def count_velocity_smartseq3(
30802988
inspect: Whether or not to inspect the output BUS file and generate
30812989
the inspect.json
30822990
strand: Strandedness, defaults to `None`
3083-
remove_ambient: Whether to remove ambient RNA using CellSweep, defaults to `False`
3084-
**kwargs: Additional keyword arguments to pass to CellSweep
30852991
30862992
Returns:
30872993
Dictionary containing paths to generated files
@@ -3091,9 +2997,6 @@ def count_velocity_smartseq3(
30912997
BUS_CDNA_PREFIX = 'spliced'
30922998
BUS_INTRON_PREFIX = 'unspliced'
30932999

3094-
#* kwargs is only added for cellsweep, so check accordingly
3095-
check_kwargs(kwargs)
3096-
30973000
results = {}
30983001
make_directory(out_dir)
30993002
unfiltered_results = results.setdefault('unfiltered', {})

0 commit comments

Comments
 (0)