Skip to content

Commit bd63bfe

Browse files
committed
--exact-barcodes and -g None
1 parent 319b8e9 commit bd63bfe

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

kb_python/count.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,26 @@
8787
INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')
8888

8989

90+
def make_transcript_t2g(
91+
txnames_path: str, out_path: str
92+
) -> str:
93+
"""Make a two-column t2g file from a transcripts file
94+
95+
Args:
96+
txnames_path: Path to transcripts.txt
97+
out_path: Path to output t2g file
98+
99+
Returns:
100+
Path to output t2g file
101+
"""
102+
t2g = read_t2g(t2g_path)
103+
with open_as_text(txnames_path, 'r') as f, open_as_text(out_path,
104+
'w') as out:
105+
for line in f:
106+
out.write(f'{transcript}\t{transcript}\n')
107+
return out_path
108+
109+
90110
def kallisto_bus(
91111
fastqs: Union[List[str], str],
92112
index_path: str,
@@ -412,7 +432,8 @@ def bustools_correct(
412432
bus_path: str,
413433
out_path: str,
414434
whitelist_path: str,
415-
replace: bool = False
435+
replace: bool = False,
436+
exact_barcodes: bool = False
416437
) -> Dict[str, str]:
417438
"""Runs `bustools correct`.
418439
@@ -421,6 +442,7 @@ def bustools_correct(
421442
out_path: Path to output corrected BUS file
422443
whitelist_path: Path to whitelist
423444
replace: If whitelist is a replacement file, defaults to `False`
445+
exact_barcodes: Use exact matching for 'correction', defaults to `False`
424446
425447
Returns:
426448
Dictionary containing path to generated index
@@ -436,6 +458,8 @@ def bustools_correct(
436458
command += [bus_path]
437459
if replace:
438460
command += ['--replace']
461+
if exact_barcodes:
462+
command += ['--nocorrect']
439463
run_executable(command)
440464
return {'bus': out_path}
441465

@@ -1214,6 +1238,7 @@ def count(
12141238
no_jump: bool = False,
12151239
quant_umis: bool = False,
12161240
keep_flags: bool = False,
1241+
exact_barcodes: bool = False,
12171242
) -> Dict[str, Union[str, Dict[str, str]]]:
12181243
"""Generates count matrices for single-cell RNA seq.
12191244
@@ -1286,6 +1311,7 @@ def count(
12861311
no_jump: Disable pseudoalignment "jumping", defaults to `False`
12871312
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
12881313
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1314+
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
12891315
12901316
Returns:
12911317
Dictionary containing paths to generated files
@@ -1349,6 +1375,10 @@ def count(
13491375
)
13501376
unfiltered_results.update(bus_result)
13511377

1378+
if t2g_path.upper() == "NONE":
1379+
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
1380+
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)
1381+
13521382
sort_result = bustools_sort(
13531383
bus_result['bus'],
13541384
os.path.join(
@@ -1388,7 +1418,7 @@ def count(
13881418
update_filename(
13891419
os.path.basename(prev_result['bus']), CORRECT_CODE
13901420
)
1391-
), whitelist_path
1421+
), whitelist_path, False, exact_barcodes
13921422
)
13931423
prev_result = bustools_sort(
13941424
prev_result['bus'],
@@ -1757,6 +1787,7 @@ def count_nac(
17571787
no_jump: bool = False,
17581788
quant_umis: bool = False,
17591789
keep_flags: bool = False,
1790+
exact_barcodes: bool = False,
17601791
) -> Dict[str, Union[Dict[str, str], str]]:
17611792
"""Generates RNA velocity matrices for single-cell RNA seq.
17621793
@@ -1826,6 +1857,7 @@ def count_nac(
18261857
no_jump: Disable pseudoalignment "jumping", defaults to `False`
18271858
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
18281859
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1860+
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
18291861
18301862
Returns:
18311863
Dictionary containing path to generated index
@@ -1886,6 +1918,10 @@ def count_nac(
18861918
)
18871919
unfiltered_results.update(bus_result)
18881920

1921+
if t2g_path.upper() == "NONE":
1922+
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
1923+
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)
1924+
18891925
sort_result = bustools_sort(
18901926
bus_result['bus'],
18911927
os.path.join(
@@ -1926,7 +1962,7 @@ def count_nac(
19261962
update_filename(
19271963
os.path.basename(sort_result['bus']), CORRECT_CODE
19281964
)
1929-
), whitelist_path
1965+
), whitelist_path, False, exact_barcodes
19301966
)
19311967
prev_result = bustools_sort(
19321968
prev_result['bus'],

kb_python/main.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,8 @@ def parse_count(
639639
union=args.union,
640640
no_jump=args.no_jump,
641641
quant_umis=args.quant_umis,
642-
keep_flags=args.keep_flags
642+
keep_flags=args.keep_flags,
643+
exact_barcodes=args.exact_barcodes
643644
)
644645
elif args.workflow in {'nucleus', 'lamanno'}:
645646
# Smartseq can not be used with lamanno or nucleus.
@@ -762,7 +763,8 @@ def parse_count(
762763
union=args.union,
763764
no_jump=args.no_jump,
764765
quant_umis=args.quant_umis,
765-
keep_flags=args.keep_flags
766+
keep_flags=args.keep_flags,
767+
exact_barcodes=args.exact_barcodes
766768
)
767769

768770

@@ -1241,6 +1243,13 @@ def setup_count_args(
12411243
),
12421244
type=str
12431245
)
1246+
parser_count.add_argument(
1247+
'--exact-barcodes',
1248+
help=(
1249+
'Only exact matches are used for matching barcodes to on-list.'
1250+
),
1251+
action='store_true'
1252+
)
12441253
parser_count.add_argument(
12451254
'-r',
12461255
metavar='REPLACEMENT',

0 commit comments

Comments
 (0)