8787INSPECT_PARSER = re .compile (r'^.*?(?P<count>[0-9]+)' )
8888
8989
90+ def make_transcript_t2g (
91+ txnames_path : str , out_path : str
92+ ) -> str :
93+ """Make a two-column t2g file from a transcripts file
94+
95+ Args:
96+ txnames_path: Path to transcripts.txt
97+ out_path: Path to output t2g file
98+
99+ Returns:
100+ Path to output t2g file
101+ """
102+ t2g = read_t2g (t2g_path )
103+ with open_as_text (txnames_path , 'r' ) as f , open_as_text (out_path ,
104+ 'w' ) as out :
105+ for line in f :
106+ out .write (f'{ transcript } \t { transcript } \n ' )
107+ return out_path
108+
109+
90110def kallisto_bus (
91111 fastqs : Union [List [str ], str ],
92112 index_path : str ,
@@ -412,7 +432,8 @@ def bustools_correct(
412432 bus_path : str ,
413433 out_path : str ,
414434 whitelist_path : str ,
415- replace : bool = False
435+ replace : bool = False ,
436+ exact_barcodes : bool = False
416437) -> Dict [str , str ]:
417438 """Runs `bustools correct`.
418439
@@ -421,6 +442,7 @@ def bustools_correct(
421442 out_path: Path to output corrected BUS file
422443 whitelist_path: Path to whitelist
423444 replace: If whitelist is a replacement file, defaults to `False`
445+ exact_barcodes: Use exact matching for 'correction', defaults to `False`
424446
425447 Returns:
426448 Dictionary containing path to generated index
@@ -436,6 +458,8 @@ def bustools_correct(
436458 command += [bus_path ]
437459 if replace :
438460 command += ['--replace' ]
461+ if exact_barcodes :
462+ command += ['--nocorrect' ]
439463 run_executable (command )
440464 return {'bus' : out_path }
441465
@@ -1214,6 +1238,7 @@ def count(
12141238 no_jump : bool = False ,
12151239 quant_umis : bool = False ,
12161240 keep_flags : bool = False ,
1241+ exact_barcodes : bool = False ,
12171242) -> Dict [str , Union [str , Dict [str , str ]]]:
12181243 """Generates count matrices for single-cell RNA seq.
12191244
@@ -1286,6 +1311,7 @@ def count(
12861311 no_jump: Disable pseudoalignment "jumping", defaults to `False`
12871312 quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
12881313 keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1314+ exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
12891315
12901316 Returns:
12911317 Dictionary containing paths to generated files
@@ -1349,6 +1375,10 @@ def count(
13491375 )
13501376 unfiltered_results .update (bus_result )
13511377
1378+ if t2g_path .upper () == "NONE" :
1379+ tmp_t2g = os .path .join (temp_dir , "t2g.txt" )
1380+ t2g_path = make_transcript_t2g (bus_result ['txnames' ], tmp_t2g )
1381+
13521382 sort_result = bustools_sort (
13531383 bus_result ['bus' ],
13541384 os .path .join (
@@ -1388,7 +1418,7 @@ def count(
13881418 update_filename (
13891419 os .path .basename (prev_result ['bus' ]), CORRECT_CODE
13901420 )
1391- ), whitelist_path
1421+ ), whitelist_path , False , exact_barcodes
13921422 )
13931423 prev_result = bustools_sort (
13941424 prev_result ['bus' ],
@@ -1757,6 +1787,7 @@ def count_nac(
17571787 no_jump : bool = False ,
17581788 quant_umis : bool = False ,
17591789 keep_flags : bool = False ,
1790+ exact_barcodes : bool = False ,
17601791) -> Dict [str , Union [Dict [str , str ], str ]]:
17611792 """Generates RNA velocity matrices for single-cell RNA seq.
17621793
@@ -1826,6 +1857,7 @@ def count_nac(
18261857 no_jump: Disable pseudoalignment "jumping", defaults to `False`
18271858 quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
18281859 keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1860+ exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
18291861
18301862 Returns:
18311863 Dictionary containing path to generated index
@@ -1886,6 +1918,10 @@ def count_nac(
18861918 )
18871919 unfiltered_results .update (bus_result )
18881920
1921+ if t2g_path .upper () == "NONE" :
1922+ tmp_t2g = os .path .join (temp_dir , "t2g.txt" )
1923+ t2g_path = make_transcript_t2g (bus_result ['txnames' ], tmp_t2g )
1924+
18891925 sort_result = bustools_sort (
18901926 bus_result ['bus' ],
18911927 os .path .join (
@@ -1926,7 +1962,7 @@ def count_nac(
19261962 update_filename (
19271963 os .path .basename (sort_result ['bus' ]), CORRECT_CODE
19281964 )
1929- ), whitelist_path
1965+ ), whitelist_path , False , exact_barcodes
19301966 )
19311967 prev_result = bustools_sort (
19321968 prev_result ['bus' ],
0 commit comments