|
| 1 | +import pathlib |
| 2 | +import shutil |
| 3 | +import gzip |
| 4 | + |
1 | 5 | import numpy as np
|
2 | 6 | import pytest
|
3 | 7 | from numpy.testing import assert_array_equal
|
4 | 8 |
|
5 |
| -# FIXME - quick hacks here to get tests working |
6 | 9 | from sc2ts import alignments as sa
|
7 | 10 | from sc2ts import core
|
8 | 11 |
|
9 | 12 |
|
| 13 | +@pytest.fixture |
| 14 | +def data_cache(): |
| 15 | + cache_path = pathlib.Path("tests/data/cache") |
| 16 | + if not cache_path.exists(): |
| 17 | + cache_path.mkdir() |
| 18 | + return cache_path |
| 19 | + |
| 20 | + |
| 21 | +@pytest.fixture |
| 22 | +def alignments_fasta(data_cache): |
| 23 | + cache_path = data_cache / "alignments.fasta" |
| 24 | + if not cache_path.exists(): |
| 25 | + with gzip.open("tests/data/alignments.fasta.gz") as src: |
| 26 | + with open(cache_path, "wb") as dest: |
| 27 | + shutil.copyfileobj(src, dest) |
| 28 | + return cache_path |
| 29 | + |
| 30 | + |
| 31 | +@pytest.fixture |
| 32 | +def alignments_store(data_cache, alignments_fasta): |
| 33 | + cache_path = data_cache / "alignments.db" |
| 34 | + if not cache_path.exists(): |
| 35 | + with sa.AlignmentStore(cache_path, "a") as a: |
| 36 | + fasta = core.FastaReader(alignments_fasta) |
| 37 | + a.append(fasta, show_progress=False) |
| 38 | + return sa.AlignmentStore(cache_path) |
| 39 | + |
| 40 | + |
| 41 | +class TestAlignmentsStore: |
| 42 | + def test_info(self, alignments_store): |
| 43 | + assert "contains" in str(alignments_store) |
| 44 | + |
| 45 | + def test_len(self, alignments_store): |
| 46 | + assert len(alignments_store) == 55 |
| 47 | + |
| 48 | + def test_fetch_known(self, alignments_store): |
| 49 | + a = alignments_store["SRR11772659"] |
| 50 | + assert a.shape == (core.REFERENCE_SEQUENCE_LENGTH,) |
| 51 | + assert a[0] == "X" |
| 52 | + assert a[1] == "N" |
| 53 | + assert a[-1] == "N" |
| 54 | + |
| 55 | + def test_keys(self, alignments_store): |
| 56 | + keys = list(alignments_store.keys()) |
| 57 | + assert len(keys) == len(alignments_store) |
| 58 | + assert "SRR11772659" in keys |
| 59 | + |
| 60 | + def test_in(self, alignments_store): |
| 61 | + assert "SRR11772659" in alignments_store |
| 62 | + assert "NOT_IN_STORE" not in alignments_store |
| 63 | + |
| 64 | + |
10 | 65 | def test_get_gene_coordinates():
|
11 | 66 | d = core.get_gene_coordinates()
|
12 | 67 | assert len(d) == 11
|
@@ -66,6 +121,12 @@ def test_examples(self, a):
|
66 | 121 | with pytest.raises(ValueError):
|
67 | 122 | sa.decode_alignment(np.array(a))
|
68 | 123 |
|
| 124 | + def test_encode_real(self, alignments_store): |
| 125 | + h = alignments_store["SRR11772659"] |
| 126 | + a = sa.encode_alignment(h) |
| 127 | + assert a[0] == -1 |
| 128 | + assert a[-1] == -1 |
| 129 | + |
69 | 130 |
|
70 | 131 | class TestMasking:
|
71 | 132 | # Window size of 1 is weird because we have to have two or more
|
@@ -113,3 +174,20 @@ def test_bad_window_size(self, w):
|
113 | 174 | a = np.zeros(2, dtype=np.int8)
|
114 | 175 | with pytest.raises(ValueError):
|
115 | 176 | sa.mask_alignment(a, window_size=w)
|
| 177 | + |
| 178 | + |
| 179 | +class TestEncodeAndMask: |
| 180 | + def test_known(self, alignments_store): |
| 181 | + a = alignments_store["SRR11772659"] |
| 182 | + ma = sa.encode_and_mask(a) |
| 183 | + assert ma.original_base_composition == { |
| 184 | + "T": 9566, |
| 185 | + "A": 8894, |
| 186 | + "G": 5850, |
| 187 | + "C": 5472, |
| 188 | + "N": 121, |
| 189 | + } |
| 190 | + assert ma.original_md5 == "e96feaa72c4f4baba73c2e147ede7502" |
| 191 | + assert len(ma.masked_sites) == 133 |
| 192 | + assert ma.masked_sites[0] == 1 |
| 193 | + assert ma.masked_sites[-1] == 29903 |
0 commit comments