File tree Expand file tree Collapse file tree 1 file changed +12
-3
lines changed Expand file tree Collapse file tree 1 file changed +12
-3
lines changed Original file line number Diff line number Diff line change @@ -43,7 +43,7 @@ def try_import(module_name):
4343from datetime import timedelta
4444
4545
46- def filter_transcripts (
46+ def filter_transcripts ( #ONLY FOR XENIUM
4747 transcripts_df : pd .DataFrame ,
4848 min_qv : float = 20.0 ,
4949) -> pd .DataFrame :
@@ -65,8 +65,17 @@ def filter_transcripts(
6565 "DeprecatedCodeword_" ,
6666 "UnassignedCodeword_" ,
6767 )
68- mask = transcripts_df ["qv" ].ge (min_qv )
69- mask &= ~ transcripts_df ["feature_name" ].str .startswith (filter_codewords )
68+
69+ transcripts_df ['feature_name' ] = transcripts_df ['feature_name' ].apply (
70+ lambda x : x .decode ("utf-8" ) if isinstance (x , bytes ) else x
71+ )
72+ mask_quality = transcripts_df ['qv' ] >= min_qv
73+
74+ # Apply the filter for unwanted codewords using Dask string functions
75+ mask_codewords = ~ transcripts_df ['feature_name' ].str .startswith (filter_codewords )
76+
77+ # Combine the filters and return the filtered Dask DataFrame
78+ mask = mask_quality & mask_codewords
7079 return transcripts_df [mask ]
7180
7281
You can’t perform that action at this time.
0 commit comments