Skip to content
Merged

Devel #291

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Setup python
uses: actions/setup-python@v1
with:
python-version: '3.8'
python-version: '3.9.22'
architecture: x64
- name: Install dependencies
run: pip install -r dev-requirements.txt
Expand All @@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python: [3.8, 3.9 ]
python: [3.9.22, 3.10.17 ]
os: [ubuntu-20.04]
name: Test on Python ${{ matrix.python }}
steps:
Expand Down
Binary file modified kb_python/bins/darwin/bustools/bustools
Binary file not shown.
Binary file modified kb_python/bins/darwin/m1/bustools/bustools
Binary file not shown.
Binary file modified kb_python/bins/linux/bustools/bustools
Binary file not shown.
Binary file modified kb_python/bins/windows/bustools/bustools.exe
Binary file not shown.
4 changes: 3 additions & 1 deletion kb_python/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class Technology(NamedTuple):
'10XFB', '10x Feature Barcode',
ngs.chemistry.get_chemistry('10xFBonly'), False
),
Technology('10XV4', '10x version 4', ngs.chemistry.get_chemistry('10xv4')),
Technology('CELSEQ', 'CEL-Seq', ngs.chemistry.get_chemistry('celseq')),
Technology(
'CELSEQ2', 'CEL-SEQ version 2', ngs.chemistry.get_chemistry('celseq2')
Expand Down Expand Up @@ -168,7 +169,8 @@ class Technology(NamedTuple):
),
Technology('Visium', '10x Visium', ngs.chemistry.get_chemistry('visium')),
Technology(
'SPLIT-SEQ', 'SPLiT-seq', ngs.chemistry.get_chemistry('split-seq')
'SPLIT-SEQ', 'SPLiT-seq (version 2)',
ngs.chemistry.get_chemistry('split-seq')
),
],
key=lambda t: t.name)
Expand Down
45 changes: 41 additions & 4 deletions kb_python/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,23 @@
INSPECT_PARSER = re.compile(r'^.*?(?P<count>[0-9]+)')


def make_transcript_t2g(txnames_path: str, out_path: str) -> str:
"""Make a two-column t2g file from a transcripts file

Args:
txnames_path: Path to transcripts.txt
out_path: Path to output t2g file

Returns:
Path to output t2g file
"""
with open_as_text(txnames_path, 'r') as f, open_as_text(out_path,
'w') as out:
for line in f:
out.write(f'{line.strip()}\t{line.strip()}\n')
return out_path


def kallisto_bus(
fastqs: Union[List[str], str],
index_path: str,
Expand Down Expand Up @@ -164,7 +181,11 @@
command += ['-i', index_path]
command += ['-o', out_dir]
if not demultiplexed:
command += ['-x', technology]
if technology.upper() == "10XV4":

Check warning on line 184 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L184

Added line #L184 was not covered by tests
# TODO: REMOVE THIS WHEN KALLISTO IS UPDATED
command += ['-x', "10XV3"]

Check warning on line 186 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L186

Added line #L186 was not covered by tests
else:
command += ['-x', technology]

Check warning on line 188 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L188

Added line #L188 was not covered by tests
elif technology[0] == '-':
# User supplied a custom demuxed (no-barcode) technology
command += ['-x', technology]
Expand Down Expand Up @@ -412,7 +433,8 @@
bus_path: str,
out_path: str,
whitelist_path: str,
replace: bool = False
replace: bool = False,
exact_barcodes: bool = False
) -> Dict[str, str]:
"""Runs `bustools correct`.

Expand All @@ -421,6 +443,7 @@
out_path: Path to output corrected BUS file
whitelist_path: Path to whitelist
replace: If whitelist is a replacement file, defaults to `False`
exact_barcodes: Use exact matching for 'correction', defaults to `False`

Returns:
Dictionary containing path to generated index
Expand All @@ -436,6 +459,8 @@
command += [bus_path]
if replace:
command += ['--replace']
if exact_barcodes:
command += ['--nocorrect']

Check warning on line 463 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L463

Added line #L463 was not covered by tests
run_executable(command)
return {'bus': out_path}

Expand Down Expand Up @@ -1214,6 +1239,7 @@
no_jump: bool = False,
quant_umis: bool = False,
keep_flags: bool = False,
exact_barcodes: bool = False,
) -> Dict[str, Union[str, Dict[str, str]]]:
"""Generates count matrices for single-cell RNA seq.

Expand Down Expand Up @@ -1286,6 +1312,7 @@
no_jump: Disable pseudoalignment "jumping", defaults to `False`
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`

Returns:
Dictionary containing paths to generated files
Expand Down Expand Up @@ -1349,6 +1376,10 @@
)
unfiltered_results.update(bus_result)

if t2g_path.upper() == "NONE":
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)

Check warning on line 1381 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L1380-L1381

Added lines #L1380 - L1381 were not covered by tests

sort_result = bustools_sort(
bus_result['bus'],
os.path.join(
Expand Down Expand Up @@ -1388,7 +1419,7 @@
update_filename(
os.path.basename(prev_result['bus']), CORRECT_CODE
)
), whitelist_path
), whitelist_path, False, exact_barcodes
)
prev_result = bustools_sort(
prev_result['bus'],
Expand Down Expand Up @@ -1757,6 +1788,7 @@
no_jump: bool = False,
quant_umis: bool = False,
keep_flags: bool = False,
exact_barcodes: bool = False,
) -> Dict[str, Union[Dict[str, str], str]]:
"""Generates RNA velocity matrices for single-cell RNA seq.

Expand Down Expand Up @@ -1826,6 +1858,7 @@
no_jump: Disable pseudoalignment "jumping", defaults to `False`
quant_umis: Whether to run quant-tcc when there are UMIs, defaults to `False`
keep_flags: Preserve flag column when sorting BUS file, defaults to `False`
exact_barcodes: Use exact match for 'correcting' barcodes to on-list, defaults to `False`

Returns:
Dictionary containing path to generated index
Expand Down Expand Up @@ -1886,6 +1919,10 @@
)
unfiltered_results.update(bus_result)

if t2g_path.upper() == "NONE":
tmp_t2g = os.path.join(temp_dir, "t2g.txt")
t2g_path = make_transcript_t2g(bus_result['txnames'], tmp_t2g)

Check warning on line 1924 in kb_python/count.py

View check run for this annotation

Codecov / codecov/patch

kb_python/count.py#L1922-L1924

Added lines #L1922 - L1924 were not covered by tests

sort_result = bustools_sort(
bus_result['bus'],
os.path.join(
Expand Down Expand Up @@ -1926,7 +1963,7 @@
update_filename(
os.path.basename(sort_result['bus']), CORRECT_CODE
)
), whitelist_path
), whitelist_path, False, exact_barcodes
)
prev_result = bustools_sort(
prev_result['bus'],
Expand Down
11 changes: 9 additions & 2 deletions kb_python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,8 @@ def parse_count(
union=args.union,
no_jump=args.no_jump,
quant_umis=args.quant_umis,
keep_flags=args.keep_flags
keep_flags=args.keep_flags,
exact_barcodes=args.exact_barcodes
)
elif args.workflow in {'nucleus', 'lamanno'}:
# Smartseq can not be used with lamanno or nucleus.
Expand Down Expand Up @@ -762,7 +763,8 @@ def parse_count(
union=args.union,
no_jump=args.no_jump,
quant_umis=args.quant_umis,
keep_flags=args.keep_flags
keep_flags=args.keep_flags,
exact_barcodes=args.exact_barcodes
)


Expand Down Expand Up @@ -1241,6 +1243,11 @@ def setup_count_args(
),
type=str
)
parser_count.add_argument(
'--exact-barcodes',
help=('Only exact matches are used for matching barcodes to on-list.'),
action='store_true'
)
parser_count.add_argument(
'-r',
metavar='REPLACEMENT',
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ def read(path):
'Operating System :: POSIX :: Linux',
'Operating System :: MacOS',
'Operating System :: Microsoft :: Windows',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Topic :: Utilities',
],
Expand Down
48 changes: 36 additions & 12 deletions tests/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,30 @@ def test_convert_transcripts_to_genes(self):
line.strip() for line in f if not line.isspace()
])

def test_make_transcript_t2g(self):
# Create a sample transcripts.txt
txnames_path = os.path.join(self.temp_dir, 'transcripts.txt')
with open(txnames_path, 'w') as f:
f.write('ENST00000335137.4\n')
f.write('ENST00000448914.6\n')

# Define output path
out_path = os.path.join(self.temp_dir, 't2g.txt')

# Call function
result_path = count.make_transcript_t2g(txnames_path, out_path)

# Check return value
self.assertEqual(result_path, out_path)

# Check file contents
with open(out_path, 'r') as f:
lines = [line.strip() for line in f if line.strip()]
self.assertEqual(lines, [
'ENST00000335137.4\tENST00000335137.4',
'ENST00000448914.6\tENST00000448914.6'
])

def test_matrix_to_cellranger(self):
out_dir = self.temp_dir
result = count.matrix_to_cellranger(
Expand Down Expand Up @@ -1156,7 +1180,7 @@ def test_count_with_whitelist(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -1295,7 +1319,7 @@ def test_count_report(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -1435,7 +1459,7 @@ def test_count_convert(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -1591,7 +1615,7 @@ def test_count_cellranger(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -1757,7 +1781,7 @@ def test_count_filter(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
self.assertEqual(1, bustools_count.call_count)
bustools_count.assert_called_once_with(
Expand Down Expand Up @@ -1904,7 +1928,7 @@ def test_count_without_whitelist(self):
self.technology, bus_s_path, out_dir
)
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -2030,7 +2054,7 @@ def test_count_kite_convert(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -2201,7 +2225,7 @@ def test_count_kite_filter(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
self.assertEqual(1, bustools_count.call_count)
bustools_count.assert_called_once_with(
Expand Down Expand Up @@ -2367,7 +2391,7 @@ def test_count_kite_FB(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scsps_path,
Expand Down Expand Up @@ -2503,7 +2527,7 @@ def test_count_bulk_multi_paired(self):
'SMARTSEQ2', bus_s_path, out_dir
)
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -2658,7 +2682,7 @@ def test_count_bulk_multi_single(self):
'SMARTSEQ2', bus_s_path, out_dir
)
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down Expand Up @@ -4077,7 +4101,7 @@ def test_count_strand(self):
)
copy_or_create_whitelist.assert_not_called()
bustools_correct.assert_called_once_with(
bus_s_path, bus_sc_path, self.whitelist_path
bus_s_path, bus_sc_path, self.whitelist_path, False, False
)
bustools_count.assert_called_once_with(
bus_scs_path,
Expand Down
14 changes: 7 additions & 7 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_run_executable_no_wait(self):
with mock.patch('kb_python.utils.sp') as sp_mock:
sp_mock.Popen().returncode = 0
utils.run_executable(['echo', 'TEST'], wait=False)
sp_mock.Popen().poll.assert_not_called()
sp_mock.Popen().pollssert_not_called()

def test_run_executable_with_stream(self):
with mock.patch('kb_python.utils.logger.debug') as debug_mock:
Expand Down Expand Up @@ -275,9 +275,9 @@ def test_collapse_anndata_by_index(self):
pd.testing.assert_index_equal(
pd.Index(['c', 'd'], name='gene_id'), collapsed.var.index
)
np.testing.assert_array_equal(np.array([[1, 2], [7, 5]]), collapsed.X.A)
np.testing.assert_array_equal(np.array([[1, 2], [7, 5]]), collapsed.X.toarray())
np.testing.assert_array_equal(
np.array([[13, 8], [19, 11]]), collapsed.layers['layer'].A
np.array([[13, 8], [19, 11]]), collapsed.layers['layer'].toarray()
)

def test_collapse_anndata_by_column(self):
Expand All @@ -303,9 +303,9 @@ def test_collapse_anndata_by_column(self):
pd.testing.assert_index_equal(
pd.Index(['e', 'f'], name='gene_name'), collapsed.var.index
)
np.testing.assert_array_equal(np.array([[0, 3], [3, 9]]), collapsed.X.A)
np.testing.assert_array_equal(np.array([[0, 3], [3, 9]]), collapsed.X.toarray())
np.testing.assert_array_equal(
np.array([[6, 15], [9, 21]]), collapsed.layers['layer'].A
np.array([[6, 15], [9, 21]]), collapsed.layers['layer'].toarray()
)

# def test_collapse_anndata_with_missing(self):
Expand All @@ -328,9 +328,9 @@ def test_collapse_anndata_by_column(self):
# pd.testing.assert_index_equal(
# pd.Index(['c'], name='gene_id'), collapsed.var.index
# )
# np.testing.assert_array_equal(np.array([[2], [8]]), collapsed.X.A)
# np.testing.assert_array_equal(np.array([[2], [8]]), collapsed.X.toarray())
# np.testing.assert_array_equal(
# np.array([[14], [20]]), collapsed.layers['layer'].A
# np.array([[14], [20]]), collapsed.layers['layer'].toarray()
# )

def test_create_10x_feature_barcode_map(self):
Expand Down