Skip to content

Commit b76a931

Browse files
authored
Merge pull request #291 from pachterlab/devel
Devel
2 parents c5f60fb + a0df33a commit b76a931

File tree

11 files changed

+99
-30
lines changed

11 files changed

+99
-30
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Setup python
1313
uses: actions/setup-python@v1
1414
with:
15-
python-version: '3.8'
15+
python-version: '3.9.22'
1616
architecture: x64
1717
- name: Install dependencies
1818
run: pip install -r dev-requirements.txt
@@ -22,7 +22,7 @@ jobs:
2222
runs-on: ubuntu-latest
2323
strategy:
2424
matrix:
25-
python: [3.8, 3.9 ]
25+
python: [3.9.22, 3.10.17 ]
2626
os: [ubuntu-20.04]
2727
name: Test on Python ${{ matrix.python }}
2828
steps:
88 Bytes
Binary file not shown.
88 Bytes
Binary file not shown.
-23.2 KB
Binary file not shown.
4.1 KB
Binary file not shown.

kb_python/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ class Technology(NamedTuple):
127127
'10XFB', '10x Feature Barcode',
128128
ngs.chemistry.get_chemistry('10xFBonly'), False
129129
),
130+
Technology('10XV4', '10x version 4', ngs.chemistry.get_chemistry('10xv4')),
130131
Technology('CELSEQ', 'CEL-Seq', ngs.chemistry.get_chemistry('celseq')),
131132
Technology(
132133
'CELSEQ2', 'CEL-SEQ version 2', ngs.chemistry.get_chemistry('celseq2')
@@ -168,7 +169,8 @@ class Technology(NamedTuple):
168169
),
169170
Technology('Visium', '10x Visium', ngs.chemistry.get_chemistry('visium')),
170171
Technology(
171-
'SPLIT-SEQ', 'SPLiT-seq', ngs.chemistry.get_chemistry('split-seq')
172+
'SPLIT-SEQ', 'SPLiT-seq (version 2)',
173+
ngs.chemistry.get_chemistry('split-seq')
172174
),
173175
],
174176
key=lambda t: t.name)

kb_python/count.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,23 @@
8787
INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')
8888

8989

90+
def make_transcript_t2g(txnames_path: str, out_path: str) -> str:
91+
"""Make a two-column t2g file from a transcripts file
92+
93+
Args:
94+
txnames_path: Path to transcripts.txt
95+
out_path: Path to output t2g file
96+
97+
Returns:
98+
Path to output t2g file
99+
"""
100+
with open_as_text(txnames_path, 'r') as f, open_as_text(out_path,
101+
'w') as out:
102+
for line in f:
103+
out.write(f'{line.strip()}\t{line.strip()}\n')
104+
return out_path
105+
106+
90107
def kallisto_bus(
91108
fastqs: Union[List[str], str],
92109
index_path: str,
@@ -164,7 +181,11 @@ def kallisto_bus(
164181
command += ['-i', index_path]
165182
command += ['-o', out_dir]
166183
if not demultiplexed:
167-
command += ['-x', technology]
184+
if technology.upper() == "10XV4":
185+
# TODO: REMOVE THIS WHEN KALLISTO IS UPDATED
186+
command += ['-x', "10XV3"]
187+
else:
188+
command += ['-x', technology]
168189
elif technology[0] == '-':
169190
# User supplied a custom demuxed (no-barcode) technology
170191
command += ['-x', technology]
@@ -412,7 +433,8 @@ def bustools_correct(
412433
bus_path: str,
413434
out_path: str,
414435
whitelist_path: str,
415-
replace: bool = False
436+
replace: bool = False,
437+
exact_barcodes: bool = False
416438
) -> Dict[str, str]:
417439
"""Runs `bustools correct`.
418440
@@ -421,6 +443,7 @@ def bustools_correct(
421443
out_path: Path to output corrected BUS file
422444
whitelist_path: Path to whitelist
423445
replace: If whitelist is a replacement file, defaults to `False`
446+
exact_barcodes: Use exact matching for 'correction', defaults to `False`
424447
425448
Returns:
426449
Dictionary containing path to generated index
@@ -436,6 +459,8 @@ def bustools_correct(
436459
command += [bus_path]
437460
if replace:
438461
command += ['--replace']
462+
if exact_barcodes:
463+
command += ['--nocorrect']
439464
run_executable(command)
440465
return {'bus': out_path}
441466

@@ -1214,6 +1239,7 @@ def count(
12141239
no_jump: bool = False,
12151240
quant_umis: bool = False,
12161241
keep_flags: bool = False,
1242+
exact_barcodes: bool = False,
12171243
) -> Dict[str, Union[str, Dict[str, str]]]:
12181244
"""Generates count matrices for single-cell RNA seq.
12191245
@@ -1286,6 +1312,7 @@ def count(
12861312
no_jump: Disable pseudoalignment "jumping", defaults to `False`
12871313
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
12881314
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1315+
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
12891316
12901317
Returns:
12911318
Dictionary containing paths to generated files
@@ -1349,6 +1376,10 @@ def count(
13491376
)
13501377
unfiltered_results.update(bus_result)
13511378

1379+
if t2g_path.upper() == "NONE":
1380+
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
1381+
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)
1382+
13521383
sort_result = bustools_sort(
13531384
bus_result['bus'],
13541385
os.path.join(
@@ -1388,7 +1419,7 @@ def count(
13881419
update_filename(
13891420
os.path.basename(prev_result['bus']), CORRECT_CODE
13901421
)
1391-
), whitelist_path
1422+
), whitelist_path, False, exact_barcodes
13921423
)
13931424
prev_result = bustools_sort(
13941425
prev_result['bus'],
@@ -1757,6 +1788,7 @@ def count_nac(
17571788
no_jump: bool = False,
17581789
quant_umis: bool = False,
17591790
keep_flags: bool = False,
1791+
exact_barcodes: bool = False,
17601792
) -> Dict[str, Union[Dict[str, str], str]]:
17611793
"""Generates RNA velocity matrices for single-cell RNA seq.
17621794
@@ -1826,6 +1858,7 @@ def count_nac(
18261858
no_jump: Disable pseudoalignment "jumping", defaults to `False`
18271859
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
18281860
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
1861+
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`
18291862
18301863
Returns:
18311864
Dictionary containing path to generated index
@@ -1886,6 +1919,10 @@ def count_nac(
18861919
)
18871920
unfiltered_results.update(bus_result)
18881921

1922+
if t2g_path.upper() == "NONE":
1923+
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
1924+
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)
1925+
18891926
sort_result = bustools_sort(
18901927
bus_result['bus'],
18911928
os.path.join(
@@ -1926,7 +1963,7 @@ def count_nac(
19261963
update_filename(
19271964
os.path.basename(sort_result['bus']), CORRECT_CODE
19281965
)
1929-
), whitelist_path
1966+
), whitelist_path, False, exact_barcodes
19301967
)
19311968
prev_result = bustools_sort(
19321969
prev_result['bus'],

kb_python/main.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,8 @@ def parse_count(
639639
union=args.union,
640640
no_jump=args.no_jump,
641641
quant_umis=args.quant_umis,
642-
keep_flags=args.keep_flags
642+
keep_flags=args.keep_flags,
643+
exact_barcodes=args.exact_barcodes
643644
)
644645
elif args.workflow in {'nucleus', 'lamanno'}:
645646
# Smartseq can not be used with lamanno or nucleus.
@@ -762,7 +763,8 @@ def parse_count(
762763
union=args.union,
763764
no_jump=args.no_jump,
764765
quant_umis=args.quant_umis,
765-
keep_flags=args.keep_flags
766+
keep_flags=args.keep_flags,
767+
exact_barcodes=args.exact_barcodes
766768
)
767769

768770

@@ -1241,6 +1243,11 @@ def setup_count_args(
12411243
),
12421244
type=str
12431245
)
1246+
parser_count.add_argument(
1247+
'--exact-barcodes',
1248+
help=('Only exact matches are used for matching barcodes to on-list.'),
1249+
action='store_true'
1250+
)
12441251
parser_count.add_argument(
12451252
'-r',
12461253
metavar='REPLACEMENT',

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,9 @@ def read(path):
3737
'Operating System :: POSIX :: Linux',
3838
'Operating System :: MacOS',
3939
'Operating System :: Microsoft :: Windows',
40-
'Programming Language :: Python :: 3.6',
41-
'Programming Language :: Python :: 3.7',
4240
'Programming Language :: Python :: 3.8',
4341
'Programming Language :: Python :: 3.9',
42+
'Programming Language :: Python :: 3.10',
4443
'Topic :: Scientific/Engineering :: Bio-Informatics',
4544
'Topic :: Utilities',
4645
],

tests/test_count.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,30 @@ def test_convert_transcripts_to_genes(self):
10271027
line.strip() for line in f if not line.isspace()
10281028
])
10291029

1030+
def test_make_transcript_t2g(self):
1031+
# Create a sample transcripts.txt
1032+
txnames_path = os.path.join(self.temp_dir, 'transcripts.txt')
1033+
with open(txnames_path, 'w') as f:
1034+
f.write('ENST00000335137.4\n')
1035+
f.write('ENST00000448914.6\n')
1036+
1037+
# Define output path
1038+
out_path = os.path.join(self.temp_dir, 't2g.txt')
1039+
1040+
# Call function
1041+
result_path = count.make_transcript_t2g(txnames_path, out_path)
1042+
1043+
# Check return value
1044+
self.assertEqual(result_path, out_path)
1045+
1046+
# Check file contents
1047+
with open(out_path, 'r') as f:
1048+
lines = [line.strip() for line in f if line.strip()]
1049+
self.assertEqual(lines, [
1050+
'ENST00000335137.4\tENST00000335137.4',
1051+
'ENST00000448914.6\tENST00000448914.6'
1052+
])
1053+
10301054
def test_matrix_to_cellranger(self):
10311055
out_dir = self.temp_dir
10321056
result = count.matrix_to_cellranger(
@@ -1156,7 +1180,7 @@ def test_count_with_whitelist(self):
11561180
)
11571181
copy_or_create_whitelist.assert_not_called()
11581182
bustools_correct.assert_called_once_with(
1159-
bus_s_path, bus_sc_path, self.whitelist_path
1183+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
11601184
)
11611185
bustools_count.assert_called_once_with(
11621186
bus_scs_path,
@@ -1295,7 +1319,7 @@ def test_count_report(self):
12951319
)
12961320
copy_or_create_whitelist.assert_not_called()
12971321
bustools_correct.assert_called_once_with(
1298-
bus_s_path, bus_sc_path, self.whitelist_path
1322+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
12991323
)
13001324
bustools_count.assert_called_once_with(
13011325
bus_scs_path,
@@ -1435,7 +1459,7 @@ def test_count_convert(self):
14351459
)
14361460
copy_or_create_whitelist.assert_not_called()
14371461
bustools_correct.assert_called_once_with(
1438-
bus_s_path, bus_sc_path, self.whitelist_path
1462+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
14391463
)
14401464
bustools_count.assert_called_once_with(
14411465
bus_scs_path,
@@ -1591,7 +1615,7 @@ def test_count_cellranger(self):
15911615
)
15921616
copy_or_create_whitelist.assert_not_called()
15931617
bustools_correct.assert_called_once_with(
1594-
bus_s_path, bus_sc_path, self.whitelist_path
1618+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
15951619
)
15961620
bustools_count.assert_called_once_with(
15971621
bus_scs_path,
@@ -1757,7 +1781,7 @@ def test_count_filter(self):
17571781
)
17581782
copy_or_create_whitelist.assert_not_called()
17591783
bustools_correct.assert_called_once_with(
1760-
bus_s_path, bus_sc_path, self.whitelist_path
1784+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
17611785
)
17621786
self.assertEqual(1, bustools_count.call_count)
17631787
bustools_count.assert_called_once_with(
@@ -1904,7 +1928,7 @@ def test_count_without_whitelist(self):
19041928
self.technology, bus_s_path, out_dir
19051929
)
19061930
bustools_correct.assert_called_once_with(
1907-
bus_s_path, bus_sc_path, self.whitelist_path
1931+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
19081932
)
19091933
bustools_count.assert_called_once_with(
19101934
bus_scs_path,
@@ -2030,7 +2054,7 @@ def test_count_kite_convert(self):
20302054
)
20312055
copy_or_create_whitelist.assert_not_called()
20322056
bustools_correct.assert_called_once_with(
2033-
bus_s_path, bus_sc_path, self.whitelist_path
2057+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
20342058
)
20352059
bustools_count.assert_called_once_with(
20362060
bus_scs_path,
@@ -2201,7 +2225,7 @@ def test_count_kite_filter(self):
22012225
)
22022226
copy_or_create_whitelist.assert_not_called()
22032227
bustools_correct.assert_called_once_with(
2204-
bus_s_path, bus_sc_path, self.whitelist_path
2228+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
22052229
)
22062230
self.assertEqual(1, bustools_count.call_count)
22072231
bustools_count.assert_called_once_with(
@@ -2367,7 +2391,7 @@ def test_count_kite_FB(self):
23672391
)
23682392
copy_or_create_whitelist.assert_not_called()
23692393
bustools_correct.assert_called_once_with(
2370-
bus_s_path, bus_sc_path, self.whitelist_path
2394+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
23712395
)
23722396
bustools_count.assert_called_once_with(
23732397
bus_scsps_path,
@@ -2503,7 +2527,7 @@ def test_count_bulk_multi_paired(self):
25032527
'SMARTSEQ2', bus_s_path, out_dir
25042528
)
25052529
bustools_correct.assert_called_once_with(
2506-
bus_s_path, bus_sc_path, self.whitelist_path
2530+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
25072531
)
25082532
bustools_count.assert_called_once_with(
25092533
bus_scs_path,
@@ -2658,7 +2682,7 @@ def test_count_bulk_multi_single(self):
26582682
'SMARTSEQ2', bus_s_path, out_dir
26592683
)
26602684
bustools_correct.assert_called_once_with(
2661-
bus_s_path, bus_sc_path, self.whitelist_path
2685+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
26622686
)
26632687
bustools_count.assert_called_once_with(
26642688
bus_scs_path,
@@ -4077,7 +4101,7 @@ def test_count_strand(self):
40774101
)
40784102
copy_or_create_whitelist.assert_not_called()
40794103
bustools_correct.assert_called_once_with(
4080-
bus_s_path, bus_sc_path, self.whitelist_path
4104+
bus_s_path, bus_sc_path, self.whitelist_path, False, False
40814105
)
40824106
bustools_count.assert_called_once_with(
40834107
bus_scs_path,

0 commit comments

Comments
 (0)